Ejemplo n.º 1
0
def get_topic(name):
    if has_kafka_api():
        pass
    else:
        manager = ManagerApi()
        broker_host = manager.get_kafka_brokers().split(',')[0].split(':')[0]
        return manager.get_kafka_topics(broker_host)[name]
Ejemplo n.º 2
0
def get_hosts(request):
    response = {'status': 0}
    api = ManagerApi(request.user)

    if request.POST.get('service', '').lower() == 'flume':
        response['hosts'] = api.get_flume_agents()

    return JsonResponse(response)
Ejemplo n.º 3
0
  def start(self, destination_name, file_format, destination):
    responses = {'status': 0}

    api = ManagerApi(self.user)

    for config_name, config_value in self.generate_config(file_format, destination):
      responses[config_name] = api.update_flume_config(cluster_name=None, config_name=config_name, config_value=config_value)

    responses['refresh_flume'] = api.refresh_flume(cluster_name=None, restart=True)

    if destination['ouputFormat'] == 'index':
      responses['pubSubUrl'] = 'assist.collections.refresh'
      responses['on_success_url'] = reverse('search:browse', kwargs={'name': destination_name})

    return responses
Ejemplo n.º 4
0
def get_topics():
    if has_kafka_api():
        return KafkaApi().topics()
    else:
        try:
            manager = ManagerApi()
            broker_host = manager.get_kafka_brokers().split(',')[0].split(
                ':')[0]
            return [
                name
                for name in list(manager.get_kafka_topics(broker_host).keys())
                if not name.startswith('__')
            ]
        except Exception as e:
            return ['user_behavior']
Ejemplo n.º 5
0
def get_topics():
    if has_kafka_api():
        return KafkaApi().topics()
    else:
        try:
            manager = ManagerApi()
            broker_host = manager.get_kafka_brokers().split(',')[0].split(
                ':')[0]
            return [
                name for name in manager.get_kafka_topics(broker_host).keys()
                if not name.startswith('__')
            ]
        except Exception, e:
            print e
            return ["traffic", "hueAccessLogs"]
Ejemplo n.º 6
0
def get_spark_history_server_from_cm():
    from metadata.conf import MANAGER
    from metadata.manager_client import ManagerApi

    if MANAGER.API_URL.get():
        return ManagerApi().get_spark_history_server_url()
    return None
Ejemplo n.º 7
0
def get_daemon_config(key):
  from metadata.conf import MANAGER
  from metadata.manager_client import ManagerApi

  if MANAGER.API_URL.get():
    return ManagerApi().get_impalad_config(key=key, impalad_host=SERVER_HOST.get())

  return None
Ejemplo n.º 8
0
def get_spark_history_server_security_enabled():
    """
    Try to get Spark history server URL from Cloudera Manager API, otherwise give default URL
  """
    from metadata.conf import MANAGER
    from metadata.manager_client import ManagerApi
    if MANAGER.API_URL.get():
        return ManagerApi().get_spark_history_server_security_enabled()
    return False
Ejemplo n.º 9
0
def update_flume_config(request):
    api = ManagerApi(request.user)

    flume_agent_config = '''tier1.sources = source1
tier1.channels = channel1
tier1.sinks = sink1

tier1.sources.source1.type = exec
tier1.sources.source1.command = tail -F /var/log/hue-httpd/access_log
tier1.sources.source1.channels = channel1

tier1.channels.channel1.type = memory
tier1.channels.channel1.capacity = 10000
tier1.channels.channel1.transactionCapacity = 1000

# Solr Sink configuration
tier1.sinks.sink1.type          = org.apache.flume.sink.solr.morphline.MorphlineSolrSink
tier1.sinks.sink1.morphlineFile = morphlines.conf
tier1.sinks.sink1.morphlineId = hue_accesslogs_no_geo
tier1.sinks.sink1.channel       = channel1'''

    morphline_config = open(
        os.path.join(config_morphline_path(),
                     'hue_accesslogs_no_geo.morphline.conf')).read()
    morphline_config = morphline_config.replace('${SOLR_COLLECTION}',
                                                'log_analytics_demo').replace(
                                                    '${ZOOKEEPER_ENSEMBLE}',
                                                    '%s/solr' % zkensemble())

    responses = {}

    responses['agent_config_file'] = api.update_flume_config(
        cluster_name=None,
        config_name='agent_config_file',
        config_value=flume_agent_config)
    responses['agent_morphlines_conf_file'] = api.update_flume_config(
        cluster_name=None,
        config_name='agent_morphlines_conf_file',
        config_value=morphline_config)

    responses['refresh_flume'] = api.refresh_flume(cluster_name=None,
                                                   restart=True)

    return JsonResponse(responses)
Ejemplo n.º 10
0
  def generate_config(self, source, destination):
    configs = []

    if source['channelSourceType'] == 'directory':
      agent_source = '''
  tier1.sources.source1.type = exec
  tier1.sources.source1.command = tail -F %(directory)s
  tier1.sources.source1.channels = channel1
      ''' % {
       'directory': source['channelSourcePath']
    }
    elif source['channelSourceType'] == 'kafka':
      agent_source = '''
  tier1.sources.source1.type = org.apache.flume.source.kafka.KafkaSource
  tier1.sources.source1.channels = channel1
  tier1.sources.source1.batchSize = 5000
  tier1.sources.source1.batchDurationMillis = 2000
  tier1.sources.source1.kafka.bootstrap.servers = localhost:9092
  tier1.sources.source1.kafka.topics = test1, test2
  tier1.sources.source1.kafka.consumer.group.id = custom.g.id
      ''' % {
       'directory': source['channelSourcePath']
    }
    else:
      raise PopupException(_('Input format not recognized: %(channelSourceType)s') % source)

    if destination['ouputFormat'] == 'file':
      agent_sink = '''
  a1.channels = c1
  a1.sinks = k1
  a1.sinks.k1.type = hdfs
  a1.sinks.k1.channel = c1
  a1.sinks.k1.hdfs.path = /flume/events/%y-%m-%d/%H%M/%S
  a1.sinks.k1.hdfs.filePrefix = events-
  a1.sinks.k1.hdfs.round = true
  a1.sinks.k1.hdfs.roundValue = 10
  a1.sinks.k1.hdfs.roundUnit = minute'''
    elif destination['ouputFormat'] == 'table':
      agent_sink = '''
  a1.channels = c1
  a1.channels.c1.type = memory
  a1.sinks = k1
  a1.sinks.k1.type = hive
  a1.sinks.k1.channel = c1
  a1.sinks.k1.hive.metastore = thrift://127.0.0.1:9083
  a1.sinks.k1.hive.database = logsdb
  a1.sinks.k1.hive.table = weblogs
  a1.sinks.k1.hive.partition = asia,%{country},%y-%m-%d-%H-%M
  a1.sinks.k1.useLocalTimeStamp = false
  a1.sinks.k1.round = true
  a1.sinks.k1.roundValue = 10
  a1.sinks.k1.roundUnit = minute
  a1.sinks.k1.serializer = DELIMITED
  a1.sinks.k1.serializer.delimiter = "\t"
  a1.sinks.k1.serializer.serdeSeparator = '\t'
  a1.sinks.k1.serializer.fieldnames =id,,msg'''
    elif destination['ouputFormat'] == 'kafka':
      manager = ManagerApi()
      agent_sink = '''
      tier1.sinks.sink1.type = org.apache.flume.sink.kafka.KafkaSink
tier1.sinks.sink1.topic = hueAccessLogs
tier1.sinks.sink1.brokerList = %(brokers)s
tier1.sinks.sink1.channel = channel1
tier1.sinks.sink1.batchSize = 20''' % {
      'brokers': manager.get_kafka_brokers()
    }

    elif destination['ouputFormat'] == 'index':
      # Morphline file
      configs.append(self.generate_morphline_config(destination))
      # Flume config
      agent_sink = '''
  tier1.sinks.sink1.type          = org.apache.flume.sink.solr.morphline.MorphlineSolrSink
  tier1.sinks.sink1.morphlineFile = morphlines.conf
  tier1.sinks.sink1.morphlineId = hue_accesslogs_no_geo
  tier1.sinks.sink1.channel       = channel1'''
    else:
      raise PopupException(_('Output format not recognized: %(ouputFormat)s') % destination)

    # TODO: use agent id: input + output and do not overide all the configs
    # TODO: use Kafka channel if possible
    flume_config = '''tier1.sources = source1
  tier1.channels = channel1
  tier1.sinks = sink1

  %(sources)s

  tier1.channels.channel1.type = memory
  tier1.channels.channel1.capacity = 10000
  tier1.channels.channel1.transactionCapacity = 1000

  %(sinks)s''' % {
    'sources': agent_source,
    'sinks': agent_sink,
  }

    configs.append(('agent_config_file', flume_config))

    return configs
Ejemplo n.º 11
0
def hello(request):
    api = ManagerApi(request.user)

    response = api.tools_echo()

    return JsonResponse(response)
Ejemplo n.º 12
0
def _envelope_job(request, file_format, destination, start_time=None, lib_path=None):
  collection_name = destination['name']
  indexer = EnvelopeIndexer(request.user, request.fs)

  lib_path = '/tmp/envelope-0.5.0.jar'
  input_path = None

  if file_format['inputFormat'] == 'table':
    db = dbms.get(request.user)
    table_metadata = db.get_table(database=file_format['databaseName'], table_name=file_format['tableName'])
    input_path = table_metadata.path_location
  elif file_format['inputFormat'] == 'file':
    input_path = '${nameNode}%s' % file_format["path"]
    properties = {
      'format': 'json'
    }
  elif file_format['inputFormat'] == 'stream':
    if file_format['streamSelection'] == 'sfdc':
      properties = {
        'streamSelection': file_format['streamSelection'],
        'streamUsername': file_format['streamUsername'],
        'streamPassword': file_format['streamPassword'],
        'streamToken': file_format['streamToken'],
        'streamEndpointUrl': file_format['streamEndpointUrl'],
        'streamObject': file_format['streamObject'],
      }
    elif file_format['streamSelection'] == 'kafka':
      manager = ManagerApi()
      properties = {
        "brokers": manager.get_kafka_brokers(),
        "output_table": "impala::%s" % collection_name,
        "topics": file_format['kafkaSelectedTopics'],
        "kafkaFieldType": file_format['kafkaFieldType'],
        "kafkaFieldDelimiter": file_format['kafkaFieldDelimiter'],
        "kafkaFieldNames": file_format['kafkaFieldNames'],
        "kafkaFieldTypes": file_format['kafkaFieldTypes']
      }

    if destination['outputFormat'] == 'table':
      if destination['isTargetExisting']:
        # Todo: check if format matches
        pass
      else:
        sql = SQLIndexer(user=request.user, fs=request.fs).create_table_from_a_file(file_format, destination).get_str()
        print sql
      if destination['tableFormat'] == 'kudu':
        manager = ManagerApi()
        properties["output_table"] = "impala::%s" % collection_name
        properties["kudu_master"] = manager.get_kudu_master()
      else:
        properties['output_table'] = collection_name
    elif destination['outputFormat'] == 'file':
      properties['path'] = file_format["path"]
      properties['format'] = file_format['tableFormat'] # or csv
    elif destination['outputFormat'] == 'index':
      properties['collectionName'] = collection_name
      properties['connection'] = SOLR_URL.get()
      if destination['isTargetExisting']:
        # Todo: check if format matches
        pass
      else:
        client = SolrClient(request.user)
        kwargs = {}
        _create_solr_collection(request.user, request.fs, client, destination, collection_name, kwargs)

  properties["app_name"] = 'Data Ingest'
  properties["inputFormat"] = file_format['inputFormat']
  properties["ouputFormat"] = destination['ouputFormat']
  properties["streamSelection"] = file_format["streamSelection"]

  envelope = indexer.generate_config(properties)

  return indexer.run(request, collection_name, envelope, input_path, start_time=start_time, lib_path=lib_path)
Ejemplo n.º 13
0
def _envelope_job(request,
                  file_format,
                  destination,
                  start_time=None,
                  lib_path=None):
    collection_name = destination['name']
    indexer = EnvelopeIndexer(request.user, request.fs)

    lib_path = None  # Todo optional input field
    input_path = None

    if file_format['inputFormat'] == 'table':
        db = dbms.get(request.user)
        table_metadata = db.get_table(database=file_format['databaseName'],
                                      table_name=file_format['tableName'])
        input_path = table_metadata.path_location
    elif file_format['inputFormat'] == 'file':
        input_path = file_format["path"]
        properties = {'input_path': input_path, 'format': 'csv'}
    elif file_format['inputFormat'] == 'stream' and file_format[
            'streamSelection'] == 'flume':
        pass
    elif file_format['inputFormat'] == 'stream':
        if file_format['streamSelection'] == 'kafka':
            manager = ManagerApi()
            properties = {
                "brokers": manager.get_kafka_brokers(),
                "topics": file_format['kafkaSelectedTopics'],
                "kafkaFieldType": file_format['kafkaFieldType'],
                "kafkaFieldDelimiter": file_format['kafkaFieldDelimiter'],
            }

            if file_format.get(
                    'kafkaSelectedTopics') == 'NavigatorAuditEvents':
                schema_fields = MorphlineIndexer.get_kept_field_list(
                    file_format['sampleCols'])
                properties.update({
                    "kafkaFieldNames":
                    ', '.join([_field['name'] for _field in schema_fields]),
                    "kafkaFieldTypes":
                    ', '.join([_field['type'] for _field in schema_fields])
                })
            else:
                properties.update({
                    "kafkaFieldNames":
                    file_format['kafkaFieldNames'],
                    "kafkaFieldTypes":
                    file_format['kafkaFieldTypes']
                })

            if True:
                properties['window'] = ''
            else:  # For "KafkaSQL"
                properties['window'] = '''
            window {
                enabled = true
                milliseconds = 60000
            }'''
    elif file_format['inputFormat'] == 'connector':
        if file_format['streamSelection'] == 'flume':
            properties = {
                'streamSelection':
                file_format['streamSelection'],
                'channelSourceHosts':
                file_format['channelSourceHosts'],
                'channelSourceSelectedHosts':
                file_format['channelSourceSelectedHosts'],
                'channelSourcePath':
                file_format['channelSourcePath'],
            }
        else:
            # sfdc
            properties = {
                'streamSelection': file_format['streamSelection'],
                'streamUsername': file_format['streamUsername'],
                'streamPassword': file_format['streamPassword'],
                'streamToken': file_format['streamToken'],
                'streamEndpointUrl': file_format['streamEndpointUrl'],
                'streamObject': file_format['streamObject'],
            }

    if destination['outputFormat'] == 'table':
        if destination['isTargetExisting']:  # Todo: check if format matches
            pass
        else:
            destination['importData'] = False  # Avoid LOAD DATA
            if destination['tableFormat'] == 'kudu':
                properties['kafkaFieldNames'] = properties[
                    'kafkaFieldNames'].lower(
                    )  # Kudu names should be all lowercase
            # Create table
            if not request.POST.get('show_command'):
                SQLIndexer(user=request.user,
                           fs=request.fs).create_table_from_a_file(
                               file_format, destination).execute(request)

        if destination['tableFormat'] == 'kudu':
            manager = ManagerApi()
            properties["output_table"] = "impala::%s" % collection_name
            properties["kudu_master"] = manager.get_kudu_master()
        else:
            properties['output_table'] = collection_name
    elif destination['outputFormat'] == 'stream':
        manager = ManagerApi()
        properties['brokers'] = manager.get_kafka_brokers()
        properties['topics'] = file_format['kafkaSelectedTopics']
        properties['kafkaFieldDelimiter'] = file_format['kafkaFieldDelimiter']
    elif destination['outputFormat'] == 'file':
        properties['path'] = file_format["path"]
        if file_format['inputFormat'] == 'stream':
            properties['format'] = 'csv'
        else:
            properties['format'] = file_format['tableFormat']  # or csv
    elif destination['outputFormat'] == 'index':
        properties['collectionName'] = collection_name
        properties['connection'] = SOLR_URL.get()

    properties["app_name"] = 'Data Ingest'
    properties["inputFormat"] = file_format['inputFormat']
    properties["ouputFormat"] = destination['ouputFormat']
    properties["streamSelection"] = file_format["streamSelection"]

    configs = indexer.generate_config(properties)

    if request.POST.get('show_command'):
        return {'status': 0, 'commands': configs['envelope.conf']}
    else:
        return indexer.run(request,
                           collection_name,
                           configs,
                           input_path,
                           start_time=start_time,
                           lib_path=lib_path)
Ejemplo n.º 14
0
def _envelope_job(request,
                  file_format,
                  destination,
                  start_time=None,
                  lib_path=None):
    collection_name = destination['name']
    indexer = EnvelopeIndexer(request.user, request.fs)

    lib_path = None  # Todo optional input field
    input_path = None

    if file_format['inputFormat'] == 'table':
        db = dbms.get(request.user)
        table_metadata = db.get_table(database=file_format['databaseName'],
                                      table_name=file_format['tableName'])
        input_path = table_metadata.path_location
    elif file_format['inputFormat'] == 'file':
        input_path = file_format["path"]
        properties = {'input_path': input_path, 'format': 'csv'}
    elif file_format['inputFormat'] == 'stream' and file_format[
            'streamSelection'] == 'flume':
        pass
    elif file_format['inputFormat'] in ('stream', 'sfdc'):
        if file_format['inputFormat'] == 'sfdc':
            properties = {
                'streamSelection': file_format['streamSelection'],
                'streamUsername': file_format['streamUsername'],
                'streamPassword': file_format['streamPassword'],
                'streamToken': file_format['streamToken'],
                'streamEndpointUrl': file_format['streamEndpointUrl'],
                'streamObject': file_format['streamObject'],
            }
        elif file_format['streamSelection'] == 'kafka':
            manager = ManagerApi()
            properties = {
                "brokers": manager.get_kafka_brokers(),
                "topics": file_format['kafkaSelectedTopics'],
                "kafkaFieldType": file_format['kafkaFieldType'],
                "kafkaFieldDelimiter": file_format['kafkaFieldDelimiter'],
                "kafkaFieldNames": file_format['kafkaFieldNames'],
                "kafkaFieldTypes": file_format['kafkaFieldTypes']
            }

            if True:
                properties['window'] = ''
            else:  # For "KafkaSQL"
                properties['window'] = '''
            window {
                enabled = true
                milliseconds = 60000
            }'''

        if destination['outputFormat'] == 'table':
            if destination['isTargetExisting']:
                # Todo: check if format matches
                pass
            else:
                sql = SQLIndexer(user=request.user,
                                 fs=request.fs).create_table_from_a_file(
                                     file_format, destination).get_str()
                print sql
            if destination['tableFormat'] == 'kudu':
                manager = ManagerApi()
                properties["output_table"] = "impala::%s" % collection_name
                properties["kudu_master"] = manager.get_kudu_master()
            else:
                properties['output_table'] = collection_name
        elif destination['outputFormat'] == 'file':
            properties['path'] = file_format["path"]
            if file_format['inputFormat'] == 'stream':
                properties['format'] = 'csv'
            else:
                properties['format'] = file_format['tableFormat']  # or csv
        elif destination['outputFormat'] == 'index':
            properties['collectionName'] = collection_name
            properties['connection'] = SOLR_URL.get()


# No needed anymore
#       if destination['isTargetExisting']:
#         # Todo: check if format matches
#         pass
#       else:
#         client = SolrClient(request.user)
#         kwargs = {}
#         _create_solr_collection(request.user, request.fs, client, destination, collection_name, kwargs)

    if destination['outputFormat'] == 'stream':
        manager = ManagerApi()
        properties['brokers'] = manager.get_kafka_brokers()
        properties['topics'] = file_format['kafkaSelectedTopics']
        properties['kafkaFieldDelimiter'] = file_format['kafkaFieldDelimiter']

    properties["app_name"] = 'Data Ingest'
    properties["inputFormat"] = file_format['inputFormat']
    properties["ouputFormat"] = destination['ouputFormat']
    properties["streamSelection"] = file_format["streamSelection"]

    envelope = indexer.generate_config(properties)

    return indexer.run(request,
                       collection_name,
                       envelope,
                       input_path,
                       start_time=start_time,
                       lib_path=lib_path)