def _envelope_job(request, file_format, destination, start_time=None, lib_path=None): collection_name = destination['name'] indexer = EnvelopeIndexer(request.user, request.fs) lib_path = None # Todo optional input field input_path = None if file_format['inputFormat'] == 'table': db = dbms.get(request.user) table_metadata = db.get_table(database=file_format['databaseName'], table_name=file_format['tableName']) input_path = table_metadata.path_location elif file_format['inputFormat'] == 'file': input_path = file_format["path"] properties = {'input_path': input_path, 'format': 'csv'} elif file_format['inputFormat'] == 'stream' and file_format[ 'streamSelection'] == 'flume': pass elif file_format['inputFormat'] == 'stream': if file_format['streamSelection'] == 'kafka': manager = ManagerApi() properties = { "brokers": manager.get_kafka_brokers(), "topics": file_format['kafkaSelectedTopics'], "kafkaFieldType": file_format['kafkaFieldType'], "kafkaFieldDelimiter": file_format['kafkaFieldDelimiter'], } if file_format.get( 'kafkaSelectedTopics') == 'NavigatorAuditEvents': schema_fields = MorphlineIndexer.get_kept_field_list( file_format['sampleCols']) properties.update({ "kafkaFieldNames": ', '.join([_field['name'] for _field in schema_fields]), "kafkaFieldTypes": ', '.join([_field['type'] for _field in schema_fields]) }) else: properties.update({ "kafkaFieldNames": file_format['kafkaFieldNames'], "kafkaFieldTypes": file_format['kafkaFieldTypes'] }) if True: properties['window'] = '' else: # For "KafkaSQL" properties['window'] = ''' window { enabled = true milliseconds = 60000 }''' elif file_format['inputFormat'] == 'connector': if file_format['streamSelection'] == 'flume': properties = { 'streamSelection': file_format['streamSelection'], 'channelSourceHosts': file_format['channelSourceHosts'], 'channelSourceSelectedHosts': file_format['channelSourceSelectedHosts'], 'channelSourcePath': file_format['channelSourcePath'], } else: # sfdc properties = { 'streamSelection': file_format['streamSelection'], 'streamUsername': file_format['streamUsername'], 'streamPassword': file_format['streamPassword'], 'streamToken': file_format['streamToken'], 'streamEndpointUrl': file_format['streamEndpointUrl'], 'streamObject': file_format['streamObject'], } if destination['outputFormat'] == 'table': if destination['isTargetExisting']: # Todo: check if format matches pass else: destination['importData'] = False # Avoid LOAD DATA if destination['tableFormat'] == 'kudu': properties['kafkaFieldNames'] = properties[ 'kafkaFieldNames'].lower( ) # Kudu names should be all lowercase # Create table if not request.POST.get('show_command'): SQLIndexer(user=request.user, fs=request.fs).create_table_from_a_file( file_format, destination).execute(request) if destination['tableFormat'] == 'kudu': manager = ManagerApi() properties["output_table"] = "impala::%s" % collection_name properties["kudu_master"] = manager.get_kudu_master() else: properties['output_table'] = collection_name elif destination['outputFormat'] == 'stream': manager = ManagerApi() properties['brokers'] = manager.get_kafka_brokers() properties['topics'] = file_format['kafkaSelectedTopics'] properties['kafkaFieldDelimiter'] = file_format['kafkaFieldDelimiter'] elif destination['outputFormat'] == 'file': properties['path'] = file_format["path"] if file_format['inputFormat'] == 'stream': properties['format'] = 'csv' else: properties['format'] = file_format['tableFormat'] # or csv elif destination['outputFormat'] == 'index': properties['collectionName'] = collection_name properties['connection'] = SOLR_URL.get() properties["app_name"] = 'Data Ingest' properties["inputFormat"] = file_format['inputFormat'] properties["ouputFormat"] = destination['ouputFormat'] properties["streamSelection"] = file_format["streamSelection"] configs = indexer.generate_config(properties) if request.POST.get('show_command'): return {'status': 0, 'commands': configs['envelope.conf']} else: return indexer.run(request, collection_name, configs, input_path, start_time=start_time, lib_path=lib_path)
def _envelope_job(request, file_format, destination, start_time=None, lib_path=None): collection_name = destination['name'] indexer = EnvelopeIndexer(request.user, request.fs) lib_path = '/tmp/envelope-0.5.0.jar' input_path = None if file_format['inputFormat'] == 'table': db = dbms.get(request.user) table_metadata = db.get_table(database=file_format['databaseName'], table_name=file_format['tableName']) input_path = table_metadata.path_location elif file_format['inputFormat'] == 'file': input_path = '${nameNode}%s' % file_format["path"] properties = { 'format': 'json' } elif file_format['inputFormat'] == 'stream': if file_format['streamSelection'] == 'sfdc': properties = { 'streamSelection': file_format['streamSelection'], 'streamUsername': file_format['streamUsername'], 'streamPassword': file_format['streamPassword'], 'streamToken': file_format['streamToken'], 'streamEndpointUrl': file_format['streamEndpointUrl'], 'streamObject': file_format['streamObject'], } elif file_format['streamSelection'] == 'kafka': manager = ManagerApi() properties = { "brokers": manager.get_kafka_brokers(), "output_table": "impala::%s" % collection_name, "topics": file_format['kafkaSelectedTopics'], "kafkaFieldType": file_format['kafkaFieldType'], "kafkaFieldDelimiter": file_format['kafkaFieldDelimiter'], "kafkaFieldNames": file_format['kafkaFieldNames'], "kafkaFieldTypes": file_format['kafkaFieldTypes'] } if destination['outputFormat'] == 'table': if destination['isTargetExisting']: # Todo: check if format matches pass else: sql = SQLIndexer(user=request.user, fs=request.fs).create_table_from_a_file(file_format, destination).get_str() print sql if destination['tableFormat'] == 'kudu': manager = ManagerApi() properties["output_table"] = "impala::%s" % collection_name properties["kudu_master"] = manager.get_kudu_master() else: properties['output_table'] = collection_name elif destination['outputFormat'] == 'file': properties['path'] = file_format["path"] properties['format'] = file_format['tableFormat'] # or csv elif destination['outputFormat'] == 'index': properties['collectionName'] = collection_name properties['connection'] = SOLR_URL.get() if destination['isTargetExisting']: # Todo: check if format matches pass else: client = SolrClient(request.user) kwargs = {} _create_solr_collection(request.user, request.fs, client, destination, collection_name, kwargs) properties["app_name"] = 'Data Ingest' properties["inputFormat"] = file_format['inputFormat'] properties["ouputFormat"] = destination['ouputFormat'] properties["streamSelection"] = file_format["streamSelection"] envelope = indexer.generate_config(properties) return indexer.run(request, collection_name, envelope, input_path, start_time=start_time, lib_path=lib_path)
def _envelope_job(request, file_format, destination, start_time=None, lib_path=None): collection_name = destination['name'] indexer = EnvelopeIndexer(request.user, request.fs) lib_path = None # Todo optional input field input_path = None if file_format['inputFormat'] == 'table': db = dbms.get(request.user) table_metadata = db.get_table(database=file_format['databaseName'], table_name=file_format['tableName']) input_path = table_metadata.path_location elif file_format['inputFormat'] == 'file': input_path = file_format["path"] properties = {'input_path': input_path, 'format': 'csv'} elif file_format['inputFormat'] == 'stream' and file_format[ 'streamSelection'] == 'flume': pass elif file_format['inputFormat'] in ('stream', 'sfdc'): if file_format['inputFormat'] == 'sfdc': properties = { 'streamSelection': file_format['streamSelection'], 'streamUsername': file_format['streamUsername'], 'streamPassword': file_format['streamPassword'], 'streamToken': file_format['streamToken'], 'streamEndpointUrl': file_format['streamEndpointUrl'], 'streamObject': file_format['streamObject'], } elif file_format['streamSelection'] == 'kafka': manager = ManagerApi() properties = { "brokers": manager.get_kafka_brokers(), "topics": file_format['kafkaSelectedTopics'], "kafkaFieldType": file_format['kafkaFieldType'], "kafkaFieldDelimiter": file_format['kafkaFieldDelimiter'], "kafkaFieldNames": file_format['kafkaFieldNames'], "kafkaFieldTypes": file_format['kafkaFieldTypes'] } if True: properties['window'] = '' else: # For "KafkaSQL" properties['window'] = ''' window { enabled = true milliseconds = 60000 }''' if destination['outputFormat'] == 'table': if destination['isTargetExisting']: # Todo: check if format matches pass else: sql = SQLIndexer(user=request.user, fs=request.fs).create_table_from_a_file( file_format, destination).get_str() print sql if destination['tableFormat'] == 'kudu': manager = ManagerApi() properties["output_table"] = "impala::%s" % collection_name properties["kudu_master"] = manager.get_kudu_master() else: properties['output_table'] = collection_name elif destination['outputFormat'] == 'file': properties['path'] = file_format["path"] if file_format['inputFormat'] == 'stream': properties['format'] = 'csv' else: properties['format'] = file_format['tableFormat'] # or csv elif destination['outputFormat'] == 'index': properties['collectionName'] = collection_name properties['connection'] = SOLR_URL.get() # No needed anymore # if destination['isTargetExisting']: # # Todo: check if format matches # pass # else: # client = SolrClient(request.user) # kwargs = {} # _create_solr_collection(request.user, request.fs, client, destination, collection_name, kwargs) if destination['outputFormat'] == 'stream': manager = ManagerApi() properties['brokers'] = manager.get_kafka_brokers() properties['topics'] = file_format['kafkaSelectedTopics'] properties['kafkaFieldDelimiter'] = file_format['kafkaFieldDelimiter'] properties["app_name"] = 'Data Ingest' properties["inputFormat"] = file_format['inputFormat'] properties["ouputFormat"] = destination['ouputFormat'] properties["streamSelection"] = file_format["streamSelection"] envelope = indexer.generate_config(properties) return indexer.run(request, collection_name, envelope, input_path, start_time=start_time, lib_path=lib_path)