Python MorphlineIndexer.is_unique_generated Exemples

Langage de programmation: Python

Espace de nommage/Pack: indexer.indexers.morphline

Class/Type: MorphlineIndexer

Méthode/Fonction: is_unique_generated

Exemples au hotexamples.com: 4

Python MorphlineIndexer.is_unique_generated - 4 exemples trouvés. Ce sont les exemples réels les mieux notés de indexer.indexers.morphline.MorphlineIndexer.is_unique_generated extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

MorphlineIndexer(16)

guess_field_types(9)

generate_morphline_config(7)

guess_format(7)

get_kept_field_list(6)

get_field_list(5)

get_unique_field(4)

is_unique_generated(4)

run_morphline(4)

generate_config(1)

start(1)

Méthodes fréquemment utilisées

MorphlineIndexer (16)

guess_field_types (9)

generate_morphline_config (7)

guess_format (7)

get_kept_field_list (6)

get_field_list (5)

get_unique_field (4)

is_unique_generated (4)

run_morphline (4)

generate_config (1)

Méthodes fréquemment utilisées

start (1)

Exemple #1

0

Afficher le fichier

def _index(request, file_format, collection_name, query=None, start_time=None, lib_path=None): indexer = MorphlineIndexer(request.user, request.fs) unique_field = indexer.get_unique_field(file_format) is_unique_generated = indexer.is_unique_generated(file_format) schema_fields = indexer.get_kept_field_list(file_format['columns']) if is_unique_generated: schema_fields += [{"name": unique_field, "type": "string"}] client = SolrClient(user=request.user) if not client.exists(collection_name): client.create_index( name=collection_name, fields=request.POST.get('fields', schema_fields), unique_key_field=unique_field ) if file_format['inputFormat'] == 'table': db = dbms.get(request.user) table_metadata = db.get_table(database=file_format['databaseName'], table_name=file_format['tableName']) input_path = table_metadata.path_location elif file_format['inputFormat'] == 'file': input_path = '${nameNode}%s' % file_format["path"] elif file_format['inputFormat'] == 'hs2_handle': searcher = CollectionManagerController(request.user) columns = ['_uuid'] + [field['name'] for field in file_format['columns']] return searcher.update_data_from_hive(collection_name, columns, fetch_handle=file_format['fetch_handle']) else: input_path = None morphline = indexer.generate_morphline_config(collection_name, file_format, unique_field, lib_path=lib_path) return indexer.run_morphline(request, collection_name, morphline, input_path, query, start_time=start_time, lib_path=lib_path)

Exemple #2

0

Afficher le fichier

def _large_indexing(request, file_format, collection_name, query=None, start_time=None, lib_path=None): indexer = MorphlineIndexer(request.user, request.fs) unique_field = indexer.get_unique_field(file_format) is_unique_generated = indexer.is_unique_generated(file_format) schema_fields = indexer.get_kept_field_list(file_format['columns']) if is_unique_generated: schema_fields += [{"name": unique_field, "type": "string"}] client = SolrClient(user=request.user) if not client.exists(collection_name): client.create_index( name=collection_name, fields=request.POST.get('fields', schema_fields), unique_key_field=unique_field # No df currently ) if file_format['inputFormat'] == 'table': db = dbms.get(request.user) table_metadata = db.get_table(database=file_format['databaseName'], table_name=file_format['tableName']) input_path = table_metadata.path_location elif file_format['inputFormat'] == 'file': input_path = '${nameNode}%s' % urllib.unquote(file_format["path"]) else: input_path = None morphline = indexer.generate_morphline_config(collection_name, file_format, unique_field, lib_path=lib_path) return indexer.run_morphline(request, collection_name, morphline, input_path, query, start_time=start_time, lib_path=lib_path)

Exemple #3

0

Afficher le fichier

def _large_indexing(request, file_format, collection_name, query=None, start_time=None, lib_path=None, destination=None): indexer = MorphlineIndexer(request.user, request.fs) unique_field = indexer.get_unique_field(file_format) is_unique_generated = indexer.is_unique_generated(file_format) schema_fields = indexer.get_kept_field_list(file_format['columns']) if is_unique_generated: schema_fields += [{"name": unique_field, "type": "string"}] client = SolrClient(user=request.user) if not client.exists(collection_name) and not request.POST.get('show_command'): # if destination['isTargetExisting']: client.create_index( name=collection_name, fields=request.POST.get('fields', schema_fields), unique_key_field=unique_field # No df currently ) else: # TODO: check if format matches pass if file_format['inputFormat'] == 'table': db = dbms.get(request.user) table_metadata = db.get_table(database=file_format['databaseName'], table_name=file_format['tableName']) input_path = table_metadata.path_location elif file_format['inputFormat'] == 'stream' and file_format['streamSelection'] == 'flume': indexer = FlumeIndexer(user=request.user) if request.POST.get('show_command'): configs = indexer.generate_config(file_format, destination) return {'status': 0, 'commands': configs[-1]} else: return indexer.start(collection_name, file_format, destination) elif file_format['inputFormat'] == 'stream': return _envelope_job(request, file_format, destination, start_time=start_time, lib_path=lib_path) elif file_format['inputFormat'] == 'file': input_path = '${nameNode}%s' % urllib_unquote(file_format["path"]) else: input_path = None morphline = indexer.generate_morphline_config(collection_name, file_format, unique_field, lib_path=lib_path) return indexer.run_morphline( request, collection_name, morphline, input_path, query, start_time=start_time, lib_path=lib_path )

Exemple #4

0

Afficher le fichier

Fichier : morphline_tests.py Projet : zzzharpreet/hue

def test_end_to_end(self): if not is_live_cluster(): # Skipping as requires morplines libs to be setup raise SkipTest() cluster = shared_cluster() fs = cluster.fs make_logged_in_client(username="******", groupname="default", recreate=True, is_superuser=False) user = User.objects.get(username="******") collection_name = "test_collection" indexer = MorphlineIndexer("test", fs=fs, jt=cluster.jt, solr_client=self.solr_client) input_loc = "/tmp/test.csv" # upload the test file to hdfs fs.create(input_loc, data=TestIndexer.simpleCSVString, overwrite=True) # open a filestream for the file on hdfs stream = fs.open(input_loc) # guess the format of the file file_type_format = indexer.guess_format({'file': {"stream": stream, "name": "test.csv"}}) field_types = indexer.guess_field_types({"file":{"stream": stream, "name": "test.csv"}, "format": file_type_format}) format_ = field_types.copy() format_['format'] = file_type_format # find a field name available to use for the record's uuid unique_field = indexer.get_unique_field(format_) is_unique_generated = indexer.is_unique_generated(format_) # generate morphline morphline = indexer.generate_morphline_config(collection_name, format_, unique_field) schema_fields = indexer.get_kept_field_list(format_['columns']) if is_unique_generated: schema_fields += [{"name": unique_field, "type": "string"}] # create the collection from the specified fields collection_manager = CollectionManagerController("test") if collection_manager.collection_exists(collection_name): collection_manager.delete_collection(collection_name, None) collection_manager.create_collection(collection_name, schema_fields, unique_key_field=unique_field) # index the file indexer.run_morphline(MockedRequest(user=user, fs=cluster.fs, jt=cluster.jt), collection_name, morphline, input_loc)