Python config_morphline_path 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: indexer.conf

메소드/함수: config_morphline_path

hotexamples.com에서의 예제들: 3

Python config_morphline_path - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 indexer.conf.config_morphline_path에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: flume.py 프로젝트: sandredd/hue-1

 def generate_morphline_config(self, destination):
   # TODO manage generic config, cf. MorphlineIndexer
   morphline_config = open(os.path.join(config_morphline_path(), 'hue_accesslogs_no_geo.morphline.conf')).read()
   morphline_config = morphline_config.replace(
     '${SOLR_COLLECTION}', destination['name']
   ).replace(
     '${ZOOKEEPER_ENSEMBLE}', '%s/solr' % zkensemble()
   )
   return ('agent_morphlines_conf_file', morphline_config)

예제 #2

파일 보기

파일: manager_api.py 프로젝트: sandredd/hue-1

def update_flume_config(request):
    api = ManagerApi(request.user)

    flume_agent_config = '''tier1.sources = source1
tier1.channels = channel1
tier1.sinks = sink1

tier1.sources.source1.type = exec
tier1.sources.source1.command = tail -F /var/log/hue-httpd/access_log
tier1.sources.source1.channels = channel1

tier1.channels.channel1.type = memory
tier1.channels.channel1.capacity = 10000
tier1.channels.channel1.transactionCapacity = 1000

# Solr Sink configuration
tier1.sinks.sink1.type          = org.apache.flume.sink.solr.morphline.MorphlineSolrSink
tier1.sinks.sink1.morphlineFile = morphlines.conf
tier1.sinks.sink1.morphlineId = hue_accesslogs_no_geo
tier1.sinks.sink1.channel       = channel1'''

    morphline_config = open(
        os.path.join(config_morphline_path(),
                     'hue_accesslogs_no_geo.morphline.conf')).read()
    morphline_config = morphline_config.replace('${SOLR_COLLECTION}',
                                                'log_analytics_demo').replace(
                                                    '${ZOOKEEPER_ENSEMBLE}',
                                                    '%s/solr' % zkensemble())

    responses = {}

    responses['agent_config_file'] = api.update_flume_config(
        cluster_name=None,
        config_name='agent_config_file',
        config_value=flume_agent_config)
    responses['agent_morphlines_conf_file'] = api.update_flume_config(
        cluster_name=None,
        config_name='agent_morphlines_conf_file',
        config_value=morphline_config)

    responses['refresh_flume'] = api.refresh_flume(cluster_name=None,
                                                   restart=True)

    return JsonResponse(responses)

예제 #3

파일 보기

    def generate_config(self, properties):
        configs = {}

        if properties['inputFormat'] == 'stream':
            if properties['streamSelection'] == 'kafka':
                if properties['topics'] == 'NavigatorAuditEvents':
                    morphline_config = open(
                        os.path.join(config_morphline_path(),
                                     'navigator_topic.morphline.conf')).read()
                    configs[
                        'navigator_topic.morphline.conf'] = morphline_config.replace(
                            '${SOLR_COLLECTION}',
                            'empty').replace('${ZOOKEEPER_ENSEMBLE}',
                                             '%s/solr' % zkensemble())
                    input = """
              type = kafka
              brokers = "%(brokers)s"
              topics = [%(topics)s]
              //group.id = nav-envelope
              encoding = bytearray
              parameter.auto.offset.reset = earliest

              translator {
                type = morphline
                encoding.key = UTF8
                encoding.message = UTF8
                morphline.file = "navigator_topic.morphline.conf"
                morphline.id = "nav-json-input"
                field.names = [%(kafkaFieldNames)s]
                field.types = [%(kafkaFieldTypes)s]
              }
              %(window)s
          """ % properties
                else:
                    input = """type = kafka
                  brokers = "%(brokers)s"
                  topics = [%(topics)s]
                  encoding = string
                  translator {
                      type = %(kafkaFieldType)s
                      delimiter = "%(kafkaFieldDelimiter)s"
                      field.names = [%(kafkaFieldNames)s]
                      field.types = [%(kafkaFieldTypes)s]
                  }
                  %(window)s
          """ % properties
            else:
                raise PopupException(
                    _('Stream format of %(inputFormat)s not recognized: %(streamSelection)s'
                      ) % properties)
        elif properties['inputFormat'] == 'connector':
            # sfdc
            input = """type = sfdc
          mode = fetch-all
          sobject = %(streamObject)s
          sfdc: {
            partner: {
              username = "******"
              password = "******"
              token = "%(streamToken)s"
              auth-endpoint = "%(streamEndpointUrl)s"
            }
          }
""" % properties
        elif properties['inputFormat'] == 'file':
            input = """type = filesystem
        path = %(input_path)s
        format = %(format)s
      """ % properties
        else:
            raise PopupException(
                _('Input format not recognized: %(inputFormat)s') % properties)

        extra_step = ''
        properties['output_deriver'] = """
        deriver {
          type = sql
          query.literal = \"\"\"SELECT * from inputdata\"\"\"
        }"""

        if properties['inputFormat'] == 'stream' and properties[
                'topics'] == 'NavigatorAuditEvents':  # Kudu does not support upper case names
            properties['output_deriver'] = """
          deriver {
            type = sql
            query.literal = \"\"\"
                SELECT concat_ws('-', time,  service, user) as id,
                -- timeDate todo
                additionalInfo as additionalinfo, allowed,
                collectionName as collectionname,
                databaseName as databasename, db,
                DELEGATION_TOKEN_ID as delegation_token_id, dst,
                entityId as entityid, time, family, impersonator, ip, name,
                objectType as objecttype,
                objType as objtype,
                objUsageType as objusagetype, op,
                operationParams as operationparams,
                operationText as operationtext,
                opText as optext, path, perms, privilege, qualifier,
                QUERY_ID as query_id,
                resourcePath as resourcepath, service,
                SESSION_ID as session_id,
                solrVersion as solrversion, src, status,
                subOperation as suboperation,
                tableName as tablename,
                `table` as `table`, type, url, user
                FROM inputdata
            \"\"\"
          }"""

        if properties['ouputFormat'] == 'file':
            output = """
        %(output_deriver)s

        planner = {
          type = overwrite
        }
        output = {
          type = filesystem
          path = %(path)s
          format = %(format)s
          header = true
        }""" % properties
        elif properties['ouputFormat'] == 'table':
            # TODO: look at table output type instead and merge
            if properties['inputFormat'] == 'stream' and properties[
                    'streamSelection'] == 'kafka':
                output = """
          %(output_deriver)s

          planner {
              type = upsert
          }
          output {
              type = kudu
              connection = "%(kudu_master)s"
              table.name = "%(output_table)s"
          }""" % properties
            else:
                output = """
         %(output_deriver)s

          planner {
              type = append
          }
          output {
              type = hive
              table.name = "%(output_table)s"
          }""" % properties
        elif properties['ouputFormat'] == 'index':
            if True:  # Workaround until envelope Solr output is official
                morphline_config = open(
                    os.path.join(config_morphline_path(),
                                 'navigator_topic.morphline.conf')).read()
                configs[
                    'navigator_topic.morphline.conf'] = morphline_config.replace(
                        '${SOLR_COLLECTION}',
                        properties['collectionName']).replace(
                            '${ZOOKEEPER_ENSEMBLE}', '%s/solr' % zkensemble())
                output = """
            // Load events to a Solr index
            // TODO: Move this to a SolrOutput step, when this is available
            deriver {
              type = morphline
              step.name = kafkaInput
              morphline.file = ${vars.morphline.file}
              morphline.id = ${vars.morphline.solr.indexer}
              field.names = ${vars.json.field.names}
              field.types = ${vars.json.field.types}
            }
          """ % properties
                extra_step = """
          solrOutput {
            dependencies = [outputdata]

            deriver {
              type = sql
              query.literal = \"\"\"
                SELECT *
                FROM outputdata LIMIT 0
                \"\"\"
            }

            planner = {
              type = append
            }

            output = {
              type = log
              path = ${vars.hdfs.basedir}
              format = csv
            }
          }""" % properties
            else:
                output = """
          %(output_deriver)s

          planner {
              type = upstert
          }
          output {
              type = solr
              connection = "%(connection)s"
              collection.name = "%(collectionName)s"
          }""" % properties
        elif properties['ouputFormat'] == 'stream':
            output = """
        %(output_deriver)s

        planner {
            type = append
        }
        output {
            type = kafka
            brokers = "%(brokers)s"
            topic = %(topics)s
            serializer.type = delimited
            serializer.field.delimiter = ","
        }""" % properties
        else:
            raise PopupException(
                _('Output format not recognized: %(ouputFormat)s') %
                properties)

        configs['envelope.conf'] = """
application {
    name = %(app_name)s
    %(batch)s
    executors = 1
    executor.cores = 1
    executor.memory = 1G
}

steps {
    inputdata {
        input {
            %(input)s
        }
    }

    outputdata {
        dependencies = [inputdata]

        %(output)s
    }

    %(extra_step)s
}

""" % {
            'input':
            input,
            'output':
            output,
            'extra_step':
            extra_step,
            'app_name':
            properties['app_name'],
            'batch':
            'batch.milliseconds = 5000'
            if properties['inputFormat'] == 'stream' else ''
        }

        return configs