def update_data_from_hive(self, db, collection_or_core_name, database, table, columns, indexing_strategy='upload'): """ Add hdfs path contents to index """ # Run a custom hive query and post data to collection from beeswax.server import dbms import tablib api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) if indexing_strategy == 'upload': table = db.get_table(database, table) hql = "SELECT %s FROM `%s.%s` %s" % (','.join(columns), database, table.name, db._get_browse_limit_clause(table)) query = dbms.hql_query(hql) try: handle = db.execute_and_wait(query) if handle: result = db.fetch(handle, rows=100) db.close(handle) dataset = tablib.Dataset() dataset.append(columns) for row in result.rows(): dataset.append(row) if not api.update(collection_or_core_name, dataset.csv, content_type='csv'): raise PopupException(_('Could not update index. Check error logs for more info.')) else: raise PopupException(_('Could not update index. Could not fetch any data from Hive.')) except Exception, e: raise PopupException(_('Could not update index.'), detail=e)
def update_data_from_hdfs(self, fs, collection_or_core_name, fields, path, data_type='separated', indexing_strategy='upload', **kwargs): """ Add hdfs path contents to index """ api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) if indexing_strategy == 'upload': stats = fs.stats(path) if stats.size > MAX_UPLOAD_SIZE: raise PopupException(_('File size is too large to handle!')) else: # Get fields for filtering unique_key, fields = self.get_fields(collection_or_core_name) fields = [{'name': field, 'type': fields[field]['type']} for field in fields] fh = fs.open(path) if data_type == 'log': # Transform to JSON then update data = json.dumps([value for value in field_values_from_log(fh, fields)]) content_type = 'json' elif data_type == 'separated': data = json.dumps([value for value in field_values_from_separated_file(fh, kwargs.get('separator', ','), kwargs.get('quote_character', '"'), fields)], indent=2) content_type = 'json' else: raise PopupException(_('Could not update index. Unknown type %s') % data_type) fh.close() if not api.update(collection_or_core_name, data, content_type=content_type): raise PopupException(_('Could not update index. Check error logs for more info.')) else: raise PopupException(_('Could not update index. Indexing strategy %s not supported.') % indexing_strategy)
def get_collections(self): solr_collections = {} solr_aliases = {} solr_cores = {} try: api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) if self.is_solr_cloud_mode(): solr_collections = api.collections() for name in solr_collections: solr_collections[name]['isCoreOnly'] = False solr_aliases = api.aliases() for name in solr_aliases: solr_aliases[name] = { 'isCoreOnly': False, 'isAlias': True, 'collections': solr_aliases[name] } solr_cores = api.cores() for name in solr_cores: solr_cores[name]['isCoreOnly'] = True except Exception, e: LOG.warn('No Zookeeper servlet running on Solr server: %s' % e)
def _create_solr_cloud_collection(self, name, fields, unique_key_field, df): with ZookeeperClient(hosts=get_solr_ensemble(), read_only=False) as zc: root_node = '%s/%s' % (ZK_SOLR_CONFIG_NAMESPACE, name) tmp_path, solr_config_path = copy_configs(fields, unique_key_field, df, True) try: config_root_path = '%s/%s' % (solr_config_path, 'conf') try: zc.copy_path(root_node, config_root_path) except Exception, e: zc.delete_path(root_node) raise PopupException(_('Error in copying Solr configurations.'), detail=e) finally: # Don't want directories laying around shutil.rmtree(tmp_path) api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) if not api.create_collection(name): # Delete instance directory if we couldn't create a collection. try: zc.delete_path(root_node) except Exception, e: raise PopupException(_('Error in deleting Solr configurations.'), detail=e) raise PopupException(_('Could not create collection. Check error logs for more info.'))
def create_collection(self, name, fields, unique_key_field='id', df='text'): """ Create solr collection or core and instance dir. Create schema.xml file so that we can set UniqueKey field. """ if self.is_solr_cloud_mode(): # solrcloud mode # Need to remove path afterwards tmp_path, solr_config_path = copy_configs(fields, unique_key_field, df, True) zc = ZookeeperClient(hosts=get_solr_ensemble(), read_only=False) root_node = '%s/%s' % (ZK_SOLR_CONFIG_NAMESPACE, name) config_root_path = '%s/%s' % (solr_config_path, 'conf') try: zc.copy_path(root_node, config_root_path) except Exception, e: zc.delete_path(root_node) raise PopupException(_('Error in copying Solr configurations.'), detail=e) # Don't want directories laying around shutil.rmtree(tmp_path) api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) if not api.create_collection(name): # Delete instance directory if we couldn't create a collection. try: zc.delete_path(root_node) except Exception, e: raise PopupException(_('Error in deleting Solr configurations.'), detail=e)
def get_autocomplete(self): autocomplete = {} try: api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) autocomplete['collections'] = api.collections2() autocomplete['configs'] = api.configs() except Exception, e: LOG.warn('No Zookeeper servlet running on Solr server: %s' % e)
def create_collection(self, name, fields, unique_key_field='id', df='text'): """ Create solr collection or core and instance dir. Create schema.xml file so that we can set UniqueKey field. """ if self.is_solr_cloud_mode(): # solrcloud mode # Need to remove path afterwards tmp_path, solr_config_path = utils.copy_configs(fields, unique_key_field, df, True) # Create instance directory. solrctl_path = get_solrctl_path() process = subprocess.Popen([solrctl_path, "instancedir", "--create", name, solr_config_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE, env={ 'SOLR_ZK_ENSEMBLE': conf.SOLR_ZK_ENSEMBLE.get() }) status = process.wait() # Don't want directories laying around shutil.rmtree(tmp_path) if status != 0: LOG.error("Could not create instance directory.\nOutput: %s\nError: %s" % process.communicate()) raise PopupException(_('Could not create instance directory. ' 'Check if solr_zk_ensemble and solrctl_path are correct in Hue config [indexer].')) api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) if not api.create_collection(name): # Delete instance directory if we couldn't create a collection. process = subprocess.Popen([solrctl_path, "instancedir", "--delete", name], stdout=subprocess.PIPE, stderr=subprocess.PIPE, env={ 'SOLR_ZK_ENSEMBLE': conf.SOLR_ZK_ENSEMBLE.get() }) if process.wait() != 0: LOG.error("Cloud not delete collection.\nOutput: %s\nError: %s" % process.communicate()) raise PopupException(_('Could not create collection. Check error logs for more info.')) else: # Non-solrcloud mode # Create instance directory locally. instancedir = os.path.join(conf.CORE_INSTANCE_DIR.get(), name) if os.path.exists(instancedir): raise PopupException(_("Instance directory %s already exists! Please remove it from the file system.") % instancedir) tmp_path, solr_config_path = utils.copy_configs(fields, unique_key_field, df, False) shutil.move(solr_config_path, instancedir) shutil.rmtree(tmp_path) api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) if not api.create_core(name, instancedir): # Delete instance directory if we couldn't create a collection. shutil.rmtree(instancedir) raise PopupException(_('Could not create collection. Check error logs for more info.'))
def is_solr_cloud_mode(self): api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) if not hasattr(self, '_solr_cloud_mode'): try: api.collections() setattr(self, '_solr_cloud_mode', True) except Exception, e: LOG.info('Non SolrCloud server: %s' % e) setattr(self, '_solr_cloud_mode', False)
def fields_data(self, user, name): api = SolrApi(SOLR_URL.get(), user) try: schema_fields = api.fields(name) schema_fields = schema_fields['schema']['fields'] except Exception, e: LOG.warn('/luke call did not succeed: %s' % e) fields = api.schema_fields(name) schema_fields = Collection2._make_luke_from_schema_fields(fields)
def is_solr_cloud_mode(self): api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) if not hasattr(self, '_solr_cloud_mode'): try: api.collections() setattr(self, '_solr_cloud_mode', True) except: setattr(self, '_solr_cloud_mode', False) return getattr(self, '_solr_cloud_mode')
def get_fields(self, collection_or_core_name): api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) try: field_data = api.fields(collection_or_core_name) fields = self._format_flags(field_data['schema']['fields']) except Exception, e: LOG.warn('/luke call did not succeed: %s' % e) try: fields = api.schema_fields(collection_or_core_name) fields = Collection2._make_luke_from_schema_fields(fields) except: LOG.exception(_('Could not fetch fields for collection %s.') % collection_or_core_name) raise PopupException(_('Could not fetch fields for collection %s. See logs for more info.') % collection_or_core_name)
def _fetch_collections(request): from libsolr.api import SolrApi from search.conf import SOLR_URL path = request.GET['path'] item = None name = None if path: item = path if '/' in path: item, name = path.split('/') api = SolrApi(SOLR_URL.get(), request.user) if not item: return {"databases": ["collections", "configs", "admin"]} elif item and name: return {"authorizable_link": "/indexer/#edit/%s" % name, "extended_columns": [], "columns": [], "partition_keys": []} elif item == 'collections': return {"tables_meta": [{"comment": None, "type": "Table", "name": col} for col in api.collections2()]} elif item == 'configs': return {"tables_meta": [{"comment": None, "type": "Table", "name": conf} for conf in api.configs()]} elif item == 'admin': return {"tables_meta": [{"comment": None, "type": "Table", "name": 'collections'}, {"comment": None, "type": "Table", "name": "cores"}]} else: raise PopupException(_('Authorizable %s could not be retrieved') % path)
def create_or_edit_alias(request): if request.method != 'POST': raise PopupException(_('POST request required.')) response = {'status': -1} alias = request.POST.get('alias', '') collections = json.loads(request.POST.get('collections', '[]')) api = SolrApi(SOLR_URL.get(), request.user, SECURITY_ENABLED.get()) try: api.create_or_modify_alias(alias, collections) response['status'] = 0 response['message'] = _('Alias created or modified!') except Exception, e: response['message'] = _('Alias could not be created or modified: %s') % e
def update_collection(self, name, fields): """ Only create new fields """ api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) # Create only new fields # Fields that already exist, do not overwrite since there is no way to do that, currently. old_field_names = api.fields(name)['schema']['fields'].keys() new_fields = filter(lambda field: field['name'] not in old_field_names, fields) new_fields_filtered = [] for field in new_fields: new_field = {} for attribute in filter(lambda attribute: attribute in field, ALLOWED_FIELD_ATTRIBUTES): new_field[attribute] = field[attribute] new_fields_filtered.append(new_field) api.add_fields(name, new_fields_filtered)
def _create_non_solr_cloud_collection(self, name, fields, unique_key_field, df): # Non-solrcloud mode # Create instance directory locally. instancedir = os.path.join(CORE_INSTANCE_DIR.get(), name) if os.path.exists(instancedir): raise PopupException(_("Instance directory %s already exists! Please remove it from the file system.") % instancedir) tmp_path, solr_config_path = copy_configs(fields, unique_key_field, df, False) try: shutil.move(solr_config_path, instancedir) finally: shutil.rmtree(tmp_path) api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) if not api.create_core(name, instancedir): # Delete instance directory if we couldn't create a collection. shutil.rmtree(instancedir) raise PopupException(_('Could not create collection. Check error logs for more info.'))
def delete_collection(self, name, core): """ Delete solr collection/core and instance dir """ api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) if core: raise PopupException(_('Cannot remove Solr cores.')) if api.remove_collection(name): # Delete instance directory. try: root_node = '%s/%s' % (ZK_SOLR_CONFIG_NAMESPACE, name) with ZookeeperClient(hosts=get_solr_ensemble(), read_only=False) as zc: zc.delete_path(root_node) except Exception, e: # Re-create collection so that we don't have an orphan config api.add_collection(name) raise PopupException(_('Error in deleting Solr configurations.'), detail=e)
class IndexController(object): """ Glue the models to the views. """ def __init__(self, user): self.user = user self.api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) def is_solr_cloud_mode(self): if not hasattr(self, '_solr_cloud_mode'): try: self.api.collections() setattr(self, '_solr_cloud_mode', True) except Exception, e: LOG.info('Non SolrCloud server: %s' % e) setattr(self, '_solr_cloud_mode', False) return getattr(self, '_solr_cloud_mode')
def delete_collection(self, name): """ Delete solr collection and instance dir """ api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) if api.remove_collection(name): # Delete instance directory. process = subprocess.Popen([conf.SOLRCTL_PATH.get(), "instancedir", "--delete", name], stdout=subprocess.PIPE, stderr=subprocess.PIPE, env={ 'SOLR_HOME': conf.SOLR_HOME.get(), 'SOLR_ZK_ENSEMBLE': conf.SOLR_ZK_ENSEMBLE.get() }) if process.wait() != 0: LOG.error("Cloud not delete instance directory.\nOutput stream: %s\nError stream: %s" % process.communicate()) raise PopupException(_('Could not create instance directory. Check error logs for more info.')) else: raise PopupException(_('Could not create collection. Check error logs for more info.'))
def execute(self, notebook, snippet): from search.conf import SOLR_URL api = NativeSolrApi(SOLR_URL.get(), self.user.username) collection = self.options.get('collection') or snippet.get('database') if not collection or collection == 'default': collection = api.collections2()[0] response = api.sql(collection, snippet['statement']) info = response['result-set']['docs'].pop(-1) # EOF, RESPONSE_TIME, EXCEPTION if info.get('EXCEPTION'): raise QueryError(info['EXCEPTION']) headers = [] for row in response['result-set']['docs']: for col in row.keys(): if col not in headers: headers.append(col) data = [[doc.get(col) for col in headers] for doc in response['result-set']['docs']] has_result_set = bool(data) return { 'sync': True, 'has_result_set': has_result_set, 'modified_row_count': 0, 'result': { 'has_more': False, 'data': data if has_result_set else [], 'meta': [{ 'name': col, 'type': '', 'comment': '' } for col in headers] if has_result_set else [], 'type': 'table' }, 'statement_id': 0, 'has_more_statements': False, 'statements_count': 1 }
def delete_collection(self, name, core): """ Delete solr collection/core and instance dir """ api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) if core: raise PopupException(_('Cannot remove Solr cores.')) if api.remove_collection(name): # Delete instance directory. solrctl_path = get_solrctl_path() process = subprocess.Popen([solrctl_path, "--zk", get_solr_ensemble(), "instancedir", "--delete", name], stdout=subprocess.PIPE, stderr=subprocess.PIPE ) if process.wait() != 0: LOG.error("Cloud not delete instance directory.\nOutput stream: %s\nError stream: %s" % process.communicate()) raise PopupException(_('Could not create instance directory. Check error logs for more info.')) else: raise PopupException(_('Could not remove collection. Check error logs for more info.'))
class SearchApi(DashboardApi): def __init__(self, user, cluster): DashboardApi.__init__(self, user, cluster) self.api = SolrApi(SOLR_URL.get(), self.user) def query(self, collection, query, facet=None): if facet: collection['template']['rows'] = 0 collection['facets'] = [facet] response = self.api.query(collection, query) return augment_solr_response(response, collection, query) def datasets(self, show_all=False): # True if non Solr Cloud client = SolrClient(user=self.user) show_all = show_all or not client.is_solr_cloud_mode() return [index['name'] for index in client.get_indexes(include_cores=show_all)] def fields(self, collection): return self.api.fields(collection) def schema_fields(self, collection): return self.api.fields(collection) def luke(self, collection): return self.api.luke(collection) def stats(self, collection, field, query=None, facet=''): return self.api.stats(collection, field, query, facet) def get(self, collection, doc_id): return self.api.get(collection, doc_id)
class SearchApi(DashboardApi): def __init__(self, user): DashboardApi.__init__(self, user) self.api = SolrApi(SOLR_URL.get(), self.user) def query(self, collection, query, facet=None): response = self.api.query(collection, query) return augment_solr_response(response, collection, query) def datasets(self, show_all=False): return SearchController(self.user).get_all_indexes(show_all=show_all) def fields(self, collection): return self.api.fields(collection) def schema_fields(self, collection): return self.api.fields(collection) def luke(self, collection): return self.api.luke(collection) def stats(self, collection, field, query=None, facet=''): return self.api.stats(collection, field, query, facet) def get(self, collection, doc_id): return self.api.get(collection, doc_id)
def create_collection(self, name, fields, unique_key_field='id', df='text'): """ Create solr collection and instance dir. Create schema.xml file so that we can set UniqueKey field. """ # Need to remove path afterwards tmp_path, solr_config_path = utils.copy_configs(fields, unique_key_field, df) # Create instance directory. process = subprocess.Popen([conf.SOLRCTL_PATH.get(), "instancedir", "--create", name, solr_config_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE, env={ 'SOLR_HOME': conf.SOLR_HOME.get(), 'SOLR_ZK_ENSEMBLE': conf.SOLR_ZK_ENSEMBLE.get() }) status = process.wait() shutil.rmtree(tmp_path) if status != 0: LOG.error("Cloud not create instance directory.\nOutput stream: %s\nError stream: %s" % process.communicate()) raise PopupException(_('Could not create instance directory. Check error logs for more info.')) api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) if not api.create_collection(name): # Delete instance directory. process = subprocess.Popen([conf.SOLRCTL_PATH.get(), "instancedir", "--delete", name], stdout=subprocess.PIPE, stderr=subprocess.PIPE, env={ 'SOLR_HOME': conf.SOLR_HOME.get(), 'SOLR_ZK_ENSEMBLE': conf.SOLR_ZK_ENSEMBLE.get() }) if process.wait() != 0: LOG.error("Cloud not delete instance directory.\nOutput stream: %s\nError stream: %s" % process.communicate()) raise PopupException(_('Could not create collection. Check error logs for more info.'))
class CollectionController(object): """ Glue the models to the views. """ def __init__(self, user): self.user = user self.api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) # def _format_flags(self, fields): # for field_name, field in fields.items(): # for index in range(0, len(FLAGS)): # flags = FLAGS[index] # field[flags[1]] = field['flags'][index] == FLAGS[index][0] # return fields # def is_solr_cloud_mode(self): if not hasattr(self, '_solr_cloud_mode'): try: self.api.collections() setattr(self, '_solr_cloud_mode', True) except Exception, e: LOG.info('Non SolrCloud server: %s' % e) setattr(self, '_solr_cloud_mode', False) return getattr(self, '_solr_cloud_mode')
def _create_facet(collection, user, facet_id, facet_label, facet_field, widget_type): properties = { 'sort': 'desc', 'canRange': False, 'stacked': False, 'limit': 10, 'mincount': 0, 'isDate': False, 'andUp': False, # Not used yet } if widget_type == 'tree-widget': facet_type = 'pivot' else: solr_api = SolrApi(SOLR_URL.get(), user) range_properties = _new_range_facet(solr_api, collection, facet_field, widget_type) if range_properties: facet_type = 'range' properties.update(range_properties) elif widget_type == 'hit-widget': facet_type = 'query' else: facet_type = 'field' if widget_type == 'map-widget': properties['scope'] = 'world' properties['mincount'] = 1 properties['limit'] = 100 elif widget_type == 'tree-widget': properties['mincount'] = 1 properties['facets'] = [] properties['facets_form'] = {'field': '', 'mincount': 1, 'limit': 10} properties['graph'] = False return { 'id': facet_id, 'label': facet_label, 'field': facet_field, 'type': facet_type, 'widgetType': widget_type, 'properties': properties }
def index_fields_dynamic(request): result = {'status': -1, 'message': 'Error'} try: name = request.POST['name'] hue_collection = Collection(name=name, label=name) dynamic_fields = SolrApi(SOLR_URL.get(), request.user).luke(hue_collection.name) result['message'] = '' result['fields'] = [Collection._make_field(name, properties) for name, properties in dynamic_fields['fields'].iteritems() if 'dynamicBase' in properties] result['gridlayout_header_fields'] = [Collection._make_gridlayout_header_field({'name': name}, True) for name, properties in dynamic_fields['fields'].iteritems() if 'dynamicBase' in properties] result['status'] = 0 except Exception, e: result['message'] = unicode(str(e), "utf8")
def get_timeline(request): result = {'status': -1, 'message': 'Error'} try: collection = json.loads(request.POST.get('collection', '{}')) query = json.loads(request.POST.get('query', '{}')) facet = json.loads(request.POST.get('facet', '{}')) qdata = json.loads(request.POST.get('qdata', '{}')) multiQ = request.POST.get('multiQ', 'query') if multiQ == 'query': label = qdata['q'] query['qs'] = [qdata] elif facet['type'] == 'range': _prop = filter(lambda prop: prop['from'] == qdata, facet['properties'])[0] label = '%(from)s - %(to)s ' % _prop facet_id = facet['id'] # Only care about our current field:value filter for fq in query['fqs']: if fq['id'] == facet_id: fq['properties'] = [_prop] else: label = qdata facet_id = facet['id'] # Only care about our current field:value filter for fq in query['fqs']: if fq['id'] == facet_id: fq['filter'] = [{'value': qdata, 'exclude': False}] # Remove other facets from collection for speed collection['facets'] = [f for f in collection['facets'] if f['widgetType'] == 'histogram-widget'] response = SolrApi(SOLR_URL.get(), request.user).query(collection, query) response = augment_solr_response(response, collection, query) label += ' (%s) ' % response['response']['numFound'] result['series'] = {'label': label, 'counts': response['normalized_facets'][0]['counts']} result['status'] = 0 result['message'] = '' except Exception as e: result['message'] = force_unicode(e) return JsonResponse(result)
def update_document(request): result = {'status': -1, 'message': 'Error'} if not can_edit_index(request.user): result['message'] = _('Permission to edit the document denied') return JsonResponse(result) try: collection = json.loads(request.POST.get('collection', '{}')) document = json.loads(request.POST.get('document', '{}')) doc_id = request.POST.get('id') if document['hasChanged']: edits = { "id": doc_id, } version = None # If there is a version, use it to avoid potential concurrent update conflicts for field in document['details']: if field['hasChanged'] and field['key'] != '_version_': edits[field['key']] = {"set": field['value']} if field['key'] == '_version_': version = field['value'] result['update'] = SolrApi(SOLR_URL.get(), request.user).update( collection['name'], json.dumps([edits]), content_type='json', version=version) result['message'] = _('Document successfully updated.') result['status'] = 0 else: result['status'] = 0 result['message'] = _('Document has no modifications to change.') except RestException as e: try: result['message'] = json.loads(e.message)['error']['msg'] except: LOG.exception('Failed to parse json response') result['message'] = force_unicode(e) except Exception as e: result['message'] = force_unicode(e) return JsonResponse(result)
def search(request): response = {} collection = json.loads(request.POST.get('collection', '{}')) query = json.loads(request.POST.get('query', '{}')) query['download'] = 'download' in request.POST if collection: try: response = SolrApi(SOLR_URL.get(), request.user).query(collection, query) response = augment_solr_response(response, collection, query) except RestException, e: try: message = json.loads(e.message)['error'] response['error'] = message.get('msg', message['trace']) except Exception, e2: LOG.exception('failed to extract json message: %s' % force_unicode(e2)) LOG.exception('failed to parse json response: %s' % force_unicode(e)) response['error'] = force_unicode(e)
def query_suggest(request): if request.method != 'POST': raise PopupException(_('POST request required.')) collection = json.loads(request.POST.get('collection', '{}')) query = request.POST.get('query', '') result = {'status': -1, 'message': ''} solr_query = {} solr_query['q'] = query solr_query['dictionary'] = collection['suggest']['dictionary'] try: response = SolrApi(SOLR_URL.get(), request.user).suggest(collection['name'], solr_query) result['response'] = response result['status'] = 0 except Exception, e: result['message'] = force_unicode(e)
def get_document(request): result = {'status': -1, 'message': 'Error'} try: collection = json.loads(request.POST.get('collection', '{}')) doc_id = request.POST.get('id') if doc_id: result['doc'] = SolrApi(SOLR_URL.get(), request.user).get(collection['name'], doc_id) result['status'] = 0 result['message'] = '' else: result['message'] = _('This document does not have any index id.') result['status'] = 1 except Exception, e: result['message'] = unicode(str(e), "utf8")
def index_fields_dynamic(request): result = {'status': -1, 'message': 'Error'} try: name = request.POST['name'] dynamic_fields = SolrApi(SOLR_URL.get(), request.user).luke(name) result['message'] = '' result['fields'] = [ Collection2._make_field(name, properties) for name, properties in dynamic_fields['fields'].iteritems() if 'dynamicBase' in properties ] result['gridlayout_header_fields'] = [ Collection2._make_gridlayout_header_field({'name': name, 'type': properties.get('type')}, True) for name, properties in dynamic_fields['fields'].iteritems() if 'dynamicBase' in properties ] result['status'] = 0 except Exception, e: result['message'] = force_unicode(e)
def get_stats(request): result = {'status': -1, 'message': 'Error'} try: collection = json.loads(request.POST.get('collection', '{}')) query = json.loads(request.POST.get('query', '{}')) analysis = json.loads(request.POST.get('analysis', '{}')) field = analysis['name'] facet = analysis['stats']['facet'] result['stats'] = SolrApi(SOLR_URL.get(), request.user).stats(collection['name'], [field], query, facet) result['status'] = 0 result['message'] = '' except Exception, e: result['message'] = force_unicode(e) if 'not currently supported' in result['message']: result['status'] = 1 result['message'] = _('This field does not support stats')
def search(request): response = {} collection = json.loads(request.POST.get('collection', '{}')) query = json.loads(request.POST.get('query', '{}')) query['download'] = 'download' in request.POST if collection: try: response = SolrApi(SOLR_URL.get(), request.user).query(collection, query) response = augment_solr_response(response, collection, query) except RestException, e: try: response['error'] = json.loads(e.message)['error']['msg'] except: response['error'] = force_unicode(str(e)) except Exception, e: raise PopupException(e, title=_('Error while accessing Solr')) response['error'] = force_unicode(str(e))
def get_range_facet(request): result = {'status': -1, 'message': ''} try: collection = json.loads(request.POST.get('collection', '{}')) facet = json.loads(request.POST.get('facet', '{}')) action = request.POST.get('action', 'select') solr_api = SolrApi(SOLR_URL.get(), request.user) if action == 'select': properties = _guess_gap(solr_api, collection, facet, facet['properties']['start'], facet['properties']['end']) else: properties = _zoom_range_facet(solr_api, collection, facet) # Zoom out result['properties'] = properties result['status'] = 0 except Exception, e: result['message'] = force_unicode(e)
def get_terms(request): result = {'status': -1, 'message': 'Error'} try: collection = json.loads(request.POST.get('collection', '{}')) analysis = json.loads(request.POST.get('analysis', '{}')) limit = json.loads(request.POST.get('limit', '25')) support_distributed = [ engine for engine in get_engines(request.user) if engine['type'] == 'solr' ][0]['analytics'] field = analysis['name'] properties = { 'terms.limit': limit, 'terms.distrib': str(support_distributed).lower(), # lower # mincount # maxcount } if analysis['terms']['prefix']: properties['terms.regex'] = '.*%(prefix)s.*' % analysis[ 'terms'] # Use regexp instead of case sensitive 'terms.prefix' properties['terms.regex.flag'] = 'case_insensitive' result['terms'] = SolrApi(SOLR_URL.get(), request.user).terms(collection['name'], field, properties) result['terms'] = pairwise2(field, [], result['terms']['terms'][field]) result['status'] = 0 result['message'] = '' except Exception as e: result['message'] = force_unicode(e) if 'not currently supported' in result['message']: result['status'] = 1 result['message'] = _('This field does not support stats') return JsonResponse(result)
def search(request): response = {} collection = json.loads(request.POST.get('collection', '{}')) query = json.loads(request.POST.get('query', '{}')) query['download'] = 'download' in request.POST # todo: remove the selected histo facet if multiq if collection['id']: hue_collection = Collection.objects.get(id=collection['id']) # TODO perms if collection: try: response = SolrApi(SOLR_URL.get(), request.user).query(collection, query) response = augment_solr_response(response, collection, query) except RestException, e: try: response['error'] = json.loads(e.message)['error']['msg'] except: response['error'] = force_unicode(str(e)) except Exception, e: raise PopupException(e, title=_('Error while accessing Solr')) response['error'] = force_unicode(str(e))
# Delete instance directory if we couldn't create a collection. try: zc.delete_path(root_node) except Exception, e: raise PopupException(_('Error in deleting Solr configurations.'), detail=e) else: # Non-solrcloud mode # Create instance directory locally. instancedir = os.path.join(CORE_INSTANCE_DIR.get(), name) if os.path.exists(instancedir): raise PopupException(_("Instance directory %s already exists! Please remove it from the file system.") % instancedir) tmp_path, solr_config_path = copy_configs(fields, unique_key_field, df, False) shutil.move(solr_config_path, instancedir) shutil.rmtree(tmp_path) api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) if not api.create_core(name, instancedir): # Delete instance directory if we couldn't create a collection. shutil.rmtree(instancedir) raise PopupException(_('Could not create collection. Check error logs for more info.')) def delete_collection(self, name, core): """ Delete solr collection/core and instance dir """ api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) if core: raise PopupException(_('Cannot remove Solr cores.')) if api.remove_collection(name): # Delete instance directory.
def __init__(self, user, cluster): DashboardApi.__init__(self, user, cluster) self.api = SolrApi(SOLR_URL.get(), self.user)
def __init__(self, user): self.user = user self.api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get())
def fields_data(self, user, name): schema_fields = SolrApi(SOLR_URL.get(), user).fields(name) schema_fields = schema_fields['schema']['fields'] return sorted([self._make_field(field, attributes) for field, attributes in schema_fields.iteritems()])
def fields_data(self, user): schema_fields = SolrApi(SOLR_URL.get(), user).fields(self.name) schema_fields = schema_fields['schema']['fields'] return sorted([self._make_field(field, attributes) for field, attributes in schema_fields.iteritems()])
def get_solr_collection(self): return SolrApi(SOLR_URL.get(), self.user).collections()
def augment_solr_response(response, collection, query): augmented = response augmented['normalized_facets'] = [] NAME = '%(field)s-%(id)s' normalized_facets = [] selected_values = dict([(fq['id'], fq['filter']) for fq in query['fqs']]) if response and response.get('facet_counts'): for facet in collection['facets']: category = facet['type'] if category == 'field' and response['facet_counts']['facet_fields']: name = NAME % facet collection_facet = get_facet_field(category, name, collection['facets']) counts = pairwise2(facet['field'], selected_values.get(facet['id'], []), response['facet_counts']['facet_fields'][name]) if collection_facet['properties']['sort'] == 'asc': counts.reverse() facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, } normalized_facets.append(facet) elif (category == 'range' or category == 'range-up') and response['facet_counts']['facet_ranges']: name = NAME % facet collection_facet = get_facet_field(category, name, collection['facets']) counts = response['facet_counts']['facet_ranges'][name]['counts'] end = response['facet_counts']['facet_ranges'][name]['end'] counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, end, collection_facet) facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, 'extraSeries': [] } normalized_facets.append(facet) elif category == 'query' and response['facet_counts']['facet_queries']: for name, value in response['facet_counts']['facet_queries'].iteritems(): collection_facet = get_facet_field(category, name, collection['facets']) facet = { 'id': collection_facet['id'], 'query': name, 'type': category, 'label': name, 'counts': value, } normalized_facets.append(facet) elif category == 'pivot': name = NAME % facet if 'facet_pivot' in response['facet_counts'] and name in response['facet_counts']['facet_pivot']: if facet['properties']['scope'] == 'stack': count = _augment_pivot_2d(name, facet['id'], response['facet_counts']['facet_pivot'][name], selected_values) else: count = response['facet_counts']['facet_pivot'][name] _augment_pivot_nd(facet['id'], count, selected_values) else: count = [] facet = { 'id': facet['id'], 'field': name, 'type': category, 'label': name, 'counts': count, } normalized_facets.append(facet) if response and response.get('facets'): for facet in collection['facets']: category = facet['type'] name = facet['id'] # Nested facets can only have one name if category == 'function' and name in response['facets']: value = response['facets'][name] collection_facet = get_facet_field(category, name, collection['facets']) facet = { 'id': collection_facet['id'], 'query': name, 'type': category, 'label': name, 'counts': value, } normalized_facets.append(facet) elif category == 'nested' and name in response['facets']: value = response['facets'][name] collection_facet = get_facet_field(category, name, collection['facets']) extraSeries = [] counts = response['facets'][name]['buckets'] cols = ['%(field)s' % facet, 'count(%(field)s)' % facet] last_x_col = 0 last_xx_col = 0 for i, f in enumerate(facet['properties']['facets']): if f['aggregate']['function'] == 'count': cols.append(f['field']) last_xx_col = last_x_col last_x_col = i + 2 cols.append(SolrApi._get_aggregate_function(f)) rows = [] # For dim in dimensions # Number or Date range if collection_facet['properties']['canRange'] and not facet['properties'].get('type') == 'field': dimension = 3 # Single dimension or dimension 2 with analytics if not collection_facet['properties']['facets'] or collection_facet['properties']['facets'][0]['aggregate']['function'] != 'count' and len(collection_facet['properties']['facets']) == 1: column = 'count' if len(collection_facet['properties']['facets']) == 1: agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_')] legend = agg_keys[0].split(':', 2)[1] column = agg_keys[0] else: legend = facet['field'] # 'count(%s)' % legend agg_keys = [column] _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) counts = [_v for _f in counts for _v in (_f['val'], _f[column])] counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, 1, collection_facet) else: # Dimension 1 with counts and 2 with analytics agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_') or key.lower().startswith('dim_')] agg_keys.sort(key=lambda a: a[4:]) if len(agg_keys) == 1 and agg_keys[0].lower().startswith('dim_'): agg_keys.insert(0, 'count') counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) _series = collections.defaultdict(list) for row in rows: for i, cell in enumerate(row): if i > last_x_col: legend = cols[i] if last_xx_col != last_x_col: legend = '%s %s' % (cols[i], row[last_x_col]) _series[legend].append(row[last_xx_col]) _series[legend].append(cell) for name, val in _series.iteritems(): _c = range_pair(facet['field'], name, selected_values.get(facet['id'], []), val, 1, collection_facet) extraSeries.append({'counts': _c, 'label': name}) counts = [] elif collection_facet['properties'].get('isOldPivot'): facet_fields = [collection_facet['field']] + [f['field'] for f in collection_facet['properties'].get('facets', []) if f['aggregate']['function'] == 'count'] column = 'count' agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_') or key.lower().startswith('dim_')] agg_keys.sort(key=lambda a: a[4:]) if len(agg_keys) == 1 and agg_keys[0].lower().startswith('dim_'): agg_keys.insert(0, 'count') counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) # _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) print counts # count = response['facets'][name] # _convert_nested_to_augmented_pivot_nd(facet_fields, facet['id'], count, selected_values, dimension=2) dimension = len(facet_fields) elif not collection_facet['properties']['facets'] or (collection_facet['properties']['facets'][0]['aggregate']['function'] != 'count' and len(collection_facet['properties']['facets']) == 1): # Dimension 1 with 1 count or agg dimension = 1 column = 'count' if len(collection_facet['properties']['facets']) == 1: agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_')] legend = agg_keys[0].split(':', 2)[1] column = agg_keys[0] else: legend = facet['field'] agg_keys = [column] _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) counts = [_v for _f in counts for _v in (_f['val'], _f[column])] counts = pairwise2(legend, selected_values.get(facet['id'], []), counts) else: # Dimension 2 with analytics or 1 with N aggregates dimension = 2 agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_') or key.lower().startswith('dim_')] agg_keys.sort(key=lambda a: a[4:]) if len(agg_keys) == 1 and agg_keys[0].lower().startswith('dim_'): agg_keys.insert(0, 'count') counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) actual_dimension = 1 + sum([_f['aggregate']['function'] == 'count' for _f in collection_facet['properties']['facets']]) counts = filter(lambda a: len(a['fq_fields']) == actual_dimension, counts) facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, 'extraSeries': extraSeries, 'dimension': dimension, 'response': {'response': {'start': 0, 'numFound': response['facets'][name]['numBuckets']}}, # Todo * nested buckets + offsets 'docs': [dict(zip(cols, row)) for row in rows], 'fieldsAttributes': [Collection2._make_gridlayout_header_field({'name': col, 'type': 'aggr' if '(' in col else 'string'}) for col in cols] } normalized_facets.append(facet) # Remove unnecessary facet data if response: response.pop('facet_counts') response.pop('facets') # HTML escaping if not query.get('download'): id_field = collection.get('idField', '') for doc in response['response']['docs']: for field, value in doc.iteritems(): if isinstance(value, numbers.Number): escaped_value = value elif field == '_childDocuments_': # Nested documents escaped_value = value elif isinstance(value, list): # Multivalue field escaped_value = [smart_unicode(escape(val), errors='replace') for val in value] else: value = smart_unicode(value, errors='replace') escaped_value = escape(value) doc[field] = escaped_value link = None if 'link-meta' in doc: meta = json.loads(doc['link-meta']) link = get_data_link(meta) doc['externalLink'] = link doc['details'] = [] doc['hueId'] = smart_unicode(doc.get(id_field, '')) highlighted_fields = response.get('highlighting', {}).keys() if highlighted_fields and not query.get('download'): id_field = collection.get('idField') if id_field: for doc in response['response']['docs']: if id_field in doc and smart_unicode(doc[id_field]) in highlighted_fields: highlighting = response['highlighting'][smart_unicode(doc[id_field])] if highlighting: escaped_highlighting = {} for field, hls in highlighting.iteritems(): _hls = [escape(smart_unicode(hl, errors='replace')).replace('<em>', '<em>').replace('</em>', '</em>') for hl in hls] escaped_highlighting[field] = _hls[0] if len(_hls) == 1 else _hls doc.update(escaped_highlighting) else: response['warning'] = _("The Solr schema requires an id field for performing the result highlighting") if normalized_facets: augmented['normalized_facets'].extend(normalized_facets) return augmented
def is_core(self, core_name): solr_cores = SolrApi(SOLR_URL.get(), self.user).cores() return core_name in solr_cores
def is_collection(self, collection_name): solr_collections = SolrApi(SOLR_URL.get(), self.user).collections() return collection_name in solr_collections
def augment_solr_response(response, collection, query): augmented = response augmented['normalized_facets'] = [] NAME = '%(field)s-%(id)s' normalized_facets = [] selected_values = dict([(fq['id'], fq['filter']) for fq in query['fqs']]) if response and response.get('facet_counts'): for facet in collection['facets']: category = facet['type'] if category == 'field' and response['facet_counts']['facet_fields']: name = NAME % facet collection_facet = get_facet_field(category, name, collection['facets']) counts = pairwise2( facet['field'], selected_values.get(facet['id'], []), response['facet_counts']['facet_fields'][name]) if collection_facet['properties']['sort'] == 'asc': counts.reverse() facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, } normalized_facets.append(facet) elif (category == 'range' or category == 'range-up') and response['facet_counts']['facet_ranges']: name = NAME % facet collection_facet = get_facet_field(category, name, collection['facets']) counts = response['facet_counts']['facet_ranges'][name][ 'counts'] end = response['facet_counts']['facet_ranges'][name]['end'] counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, end, collection_facet) facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, 'extraSeries': [] } normalized_facets.append(facet) elif category == 'query' and response['facet_counts'][ 'facet_queries']: for name, value in response['facet_counts'][ 'facet_queries'].iteritems(): collection_facet = get_facet_field(category, name, collection['facets']) facet = { 'id': collection_facet['id'], 'query': name, 'type': category, 'label': name, 'counts': value, } normalized_facets.append(facet) elif category == 'pivot': name = NAME % facet if 'facet_pivot' in response[ 'facet_counts'] and name in response['facet_counts'][ 'facet_pivot']: if facet['properties']['scope'] == 'stack': count = _augment_pivot_2d( name, facet['id'], response['facet_counts']['facet_pivot'][name], selected_values) else: count = response['facet_counts']['facet_pivot'][name] _augment_pivot_nd(facet['id'], count, selected_values) else: count = [] facet = { 'id': facet['id'], 'field': name, 'type': category, 'label': name, 'counts': count, } normalized_facets.append(facet) if response and response.get('facets'): for facet in collection['facets']: category = facet['type'] name = facet['id'] # Nested facets can only have one name if category == 'function' and name in response['facets']: collection_facet = get_facet_field(category, name, collection['facets']) value = response['facets'][name] counts = {'value': value, 'percentage': 0} if collection_facet['properties']['filter']['is_enabled']: if collection_facet['properties']['compare']['is_enabled']: value = value[name] else: counts['value'] = value['count'] if collection_facet['properties']['compare']['is_enabled']: orignal_number, final_number = value['buckets'][0].get( name, 0), value['buckets'][1].get(name, 0) if collection_facet['properties']['compare'][ 'use_percentage']: if orignal_number != 0: counts['percentage'] = ( final_number - orignal_number) / orignal_number * 100.0 counts['value'] = final_number - orignal_number facet = { 'id': collection_facet['id'], 'query': name, 'type': category, 'label': name, 'counts': counts, } normalized_facets.append(facet) elif category == 'nested' and name in response['facets']: value = response['facets'][name] collection_facet = get_facet_field(category, name, collection['facets']) extraSeries = [] counts = response['facets'][name]['buckets'] # Give humane names to the columns cols = ['%(field)s' % facet, 'count(%(field)s)' % facet] last_seen_dim_col_index = 0 prev_last_seen_dim_col_index = 0 for i, f in enumerate(facet['properties']['facets'][1:]): if f['aggregate']['function'] == 'count': cols.append(f['field']) prev_last_seen_dim_col_index = last_seen_dim_col_index last_seen_dim_col_index = i + 2 from libsolr.api import SolrApi aggregate_name = SolrApi._get_aggregate_function(f) cols.append(aggregate_name + ('_%(field)s' % facet['properties']['facets'] [last_seen_dim_col_index - 1] if aggregate_name in cols else '')) rows = [] facet_one = collection_facet['properties']['facets'][0] if 'missing' in value: counts.append({ 'val': '', 'count': value['missing']['count'] }) # Number or Date range if facet_one['canRange'] and not facet_one['type'] == 'field': dimension = 3 if facet_one['isDate'] else 1 # Single dimension or dimension 2 with analytics if len(collection_facet['properties'] ['facets']) == 1 or len( collection_facet['properties']['facets'] ) == 2 and collection_facet['properties']['facets'][ 1]['aggregate']['function'] != 'count': column = 'count' if len(collection_facet['properties']['facets']) == 2: agg_keys = [ key for key, value in counts[0].items() if key.lower().startswith('agg_') ] legend = agg_keys[0].split(':', 2)[1] column = agg_keys[0] else: legend = facet['field'] # 'count(%s)' % legend agg_keys = [column] _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) counts = [ _v for _f in counts for _v in (_f['val'], _f[column]) ] counts = range_pair2( facet['field'], name, selected_values.get(facet['id'], []), counts, 1, collection_facet['properties']['facets'][0], collection_facet=collection_facet) else: # Dimension 1 with counts and 2 with analytics agg_keys = [ key for key, value in counts[0].items() if key.lower().startswith('agg_') or key.lower().startswith('dim_') ] agg_keys.sort(key=lambda a: a[4:]) if len(agg_keys) == 1 and agg_keys[0].lower( ).startswith('dim_'): agg_keys.insert(0, 'count') counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) _series = collections.defaultdict(list) for row in rows: for i, cell in enumerate(row): if i > last_seen_dim_col_index: legend = cols[i] if prev_last_seen_dim_col_index != last_seen_dim_col_index: legend = '%s %s' % ( cols[i], row[last_seen_dim_col_index]) _series[legend].append( row[prev_last_seen_dim_col_index]) _series[legend].append(cell) for _name, val in _series.iteritems(): _c = range_pair2( facet['field'], _name, selected_values.get(facet['id'], []), val, 1, collection_facet['properties']['facets'][0]) extraSeries.append({'counts': _c, 'label': _name}) counts = [] elif collection_facet['properties'].get('isOldPivot'): facet_fields = [collection_facet['field']] + [ f['field'] for f in collection_facet['properties'].get( 'facets', []) if f['aggregate']['function'] == 'count' ] column = 'count' agg_keys = [ key for key, value in counts[0].items() if key.lower().startswith('agg_') or key.lower().startswith('dim_') ] agg_keys.sort(key=lambda a: a[4:]) if len(agg_keys) == 1 and agg_keys[0].lower().startswith( 'dim_'): agg_keys.insert(0, 'count') counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) #_convert_nested_to_augmented_pivot_nd(facet_fields, facet['id'], count, selected_values, dimension=2) dimension = len(facet_fields) elif len(collection_facet['properties']['facets']) == 1 or ( len(collection_facet['properties']['facets']) == 2 and collection_facet['properties']['facets'][1] ['aggregate']['function'] != 'count'): # Dimension 1 with 1 count or agg dimension = 1 column = 'count' agg_keys = counts and [ key for key, value in counts[0].items() if key.lower().startswith('agg_') ] if len(collection_facet['properties'] ['facets']) == 2 and agg_keys: column = agg_keys[0] else: agg_keys = [column] legend = collection_facet['properties']['facets'][0][ 'field'] _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) counts = [ _v for _f in counts for _v in (_f['val'], _f[column]) ] counts = pairwise2(legend, selected_values.get(facet['id'], []), counts) else: # Dimension 2 with analytics or 1 with N aggregates dimension = 2 agg_keys = counts and [ key for key, value in counts[0].items() if key.lower().startswith('agg_') or key.lower().startswith('dim_') ] agg_keys.sort(key=lambda a: a[4:]) if len(agg_keys) == 1 and agg_keys[0].lower().startswith( 'dim_'): agg_keys.insert(0, 'count') counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) actual_dimension = sum([ _f['aggregate']['function'] == 'count' for _f in collection_facet['properties']['facets'] ]) counts = filter( lambda a: len(a['fq_fields']) == actual_dimension, counts) num_bucket = response['facets'][name][ 'numBuckets'] if 'numBuckets' in response['facets'][ name] else len(response['facets'][name]) facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, 'extraSeries': extraSeries, 'dimension': dimension, 'response': { 'response': { 'start': 0, 'numFound': num_bucket } }, # Todo * nested buckets + offsets 'docs': [dict(zip(cols, row)) for row in rows], 'fieldsAttributes': [ Collection2._make_gridlayout_header_field({ 'name': col, 'type': 'aggr' if '(' in col else 'string' }) for col in cols ] } normalized_facets.append(facet) # Remove unnecessary facet data if response: response.pop('facet_counts') response.pop('facets') augment_response(collection, query, response) if normalized_facets: augmented['normalized_facets'].extend(normalized_facets) return augmented
def test_is_solr_cloud_mode(self): SolrApi(SOLR_URL.get(), self.user).collections()
def augment_solr_response(response, collection, query): augmented = response augmented['normalized_facets'] = [] NAME = '%(field)s-%(id)s' normalized_facets = [] selected_values = dict([(fq['id'], fq['filter']) for fq in query['fqs']]) if response and response.get('facet_counts'): for facet in collection['facets']: category = facet['type'] if category == 'field' and response['facet_counts']['facet_fields']: name = NAME % facet collection_facet = get_facet_field(category, name, collection['facets']) counts = pairwise2(facet['field'], selected_values.get(facet['id'], []), response['facet_counts']['facet_fields'][name]) if collection_facet['properties']['sort'] == 'asc': counts.reverse() facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, } normalized_facets.append(facet) elif (category == 'range' or category == 'range-up') and response['facet_counts']['facet_ranges']: name = NAME % facet collection_facet = get_facet_field(category, name, collection['facets']) counts = response['facet_counts']['facet_ranges'][name]['counts'] end = response['facet_counts']['facet_ranges'][name]['end'] counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, end, collection_facet) facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, 'extraSeries': [] } normalized_facets.append(facet) elif category == 'query' and response['facet_counts']['facet_queries']: for name, value in response['facet_counts']['facet_queries'].iteritems(): collection_facet = get_facet_field(category, name, collection['facets']) facet = { 'id': collection_facet['id'], 'query': name, 'type': category, 'label': name, 'counts': value, } normalized_facets.append(facet) elif category == 'pivot': name = NAME % facet if 'facet_pivot' in response['facet_counts'] and name in response['facet_counts']['facet_pivot']: if facet['properties']['scope'] == 'stack': count = _augment_pivot_2d(name, facet['id'], response['facet_counts']['facet_pivot'][name], selected_values) else: count = response['facet_counts']['facet_pivot'][name] _augment_pivot_nd(facet['id'], count, selected_values) else: count = [] facet = { 'id': facet['id'], 'field': name, 'type': category, 'label': name, 'counts': count, } normalized_facets.append(facet) if response and response.get('facets'): for facet in collection['facets']: category = facet['type'] name = facet['id'] # Nested facets can only have one name if category == 'function' and name in response['facets']: value = response['facets'][name] collection_facet = get_facet_field(category, name, collection['facets']) facet = { 'id': collection_facet['id'], 'query': name, 'type': category, 'label': name, 'counts': value, } normalized_facets.append(facet) elif category == 'nested' and name in response['facets']: value = response['facets'][name] collection_facet = get_facet_field(category, name, collection['facets']) extraSeries = [] counts = response['facets'][name]['buckets'] cols = ['%(field)s' % facet, 'count(%(field)s)' % facet] last_x_col = 0 last_xx_col = 0 for i, f in enumerate(facet['properties']['facets']): if f['aggregate']['function'] == 'count': cols.append(f['field']) last_xx_col = last_x_col last_x_col = i + 2 cols.append(SolrApi._get_aggregate_function(f)) rows = [] # For dim in dimensions # Number or Date range if collection_facet['properties']['canRange'] and not facet['properties'].get('type') == 'field': dimension = 3 if collection_facet['properties']['isDate'] else 1 # Single dimension or dimension 2 with analytics if not collection_facet['properties']['facets'] or collection_facet['properties']['facets'][0]['aggregate']['function'] != 'count' and len(collection_facet['properties']['facets']) == 1: column = 'count' if len(collection_facet['properties']['facets']) == 1: agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_')] legend = agg_keys[0].split(':', 2)[1] column = agg_keys[0] else: legend = facet['field'] # 'count(%s)' % legend agg_keys = [column] _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) counts = [_v for _f in counts for _v in (_f['val'], _f[column])] counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, 1, collection_facet) else: # Dimension 1 with counts and 2 with analytics agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_') or key.lower().startswith('dim_')] agg_keys.sort(key=lambda a: a[4:]) if len(agg_keys) == 1 and agg_keys[0].lower().startswith('dim_'): agg_keys.insert(0, 'count') counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) _series = collections.defaultdict(list) for row in rows: for i, cell in enumerate(row): if i > last_x_col: legend = cols[i] if last_xx_col != last_x_col: legend = '%s %s' % (cols[i], row[last_x_col]) _series[legend].append(row[last_xx_col]) _series[legend].append(cell) for name, val in _series.iteritems(): _c = range_pair(facet['field'], name, selected_values.get(facet['id'], []), val, 1, collection_facet) extraSeries.append({'counts': _c, 'label': name}) counts = [] elif collection_facet['properties'].get('isOldPivot'): facet_fields = [collection_facet['field']] + [f['field'] for f in collection_facet['properties'].get('facets', []) if f['aggregate']['function'] == 'count'] column = 'count' agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_') or key.lower().startswith('dim_')] agg_keys.sort(key=lambda a: a[4:]) if len(agg_keys) == 1 and agg_keys[0].lower().startswith('dim_'): agg_keys.insert(0, 'count') counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) #_convert_nested_to_augmented_pivot_nd(facet_fields, facet['id'], count, selected_values, dimension=2) dimension = len(facet_fields) elif not collection_facet['properties']['facets'] or (collection_facet['properties']['facets'][0]['aggregate']['function'] != 'count' and len(collection_facet['properties']['facets']) == 1): # Dimension 1 with 1 count or agg dimension = 1 column = 'count' if len(collection_facet['properties']['facets']) == 1: agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_')] legend = agg_keys[0].split(':', 2)[1] column = agg_keys[0] else: legend = facet['field'] agg_keys = [column] _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) counts = [_v for _f in counts for _v in (_f['val'], _f[column])] counts = pairwise2(legend, selected_values.get(facet['id'], []), counts) else: # Dimension 2 with analytics or 1 with N aggregates dimension = 2 agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_') or key.lower().startswith('dim_')] agg_keys.sort(key=lambda a: a[4:]) if len(agg_keys) == 1 and agg_keys[0].lower().startswith('dim_'): agg_keys.insert(0, 'count') counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) actual_dimension = 1 + sum([_f['aggregate']['function'] == 'count' for _f in collection_facet['properties']['facets']]) counts = filter(lambda a: len(a['fq_fields']) == actual_dimension, counts) num_bucket = response['facets'][name]['numBuckets'] if 'numBuckets' in response['facets'][name] else len(response['facets'][name]) facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, 'extraSeries': extraSeries, 'dimension': dimension, 'response': {'response': {'start': 0, 'numFound': num_bucket}}, # Todo * nested buckets + offsets 'docs': [dict(zip(cols, row)) for row in rows], 'fieldsAttributes': [Collection2._make_gridlayout_header_field({'name': col, 'type': 'aggr' if '(' in col else 'string'}) for col in cols] } normalized_facets.append(facet) # Remove unnecessary facet data if response: response.pop('facet_counts') response.pop('facets') # HTML escaping if not query.get('download'): id_field = collection.get('idField', '') for doc in response['response']['docs']: for field, value in doc.iteritems(): if isinstance(value, numbers.Number): escaped_value = value elif field == '_childDocuments_': # Nested documents escaped_value = value elif isinstance(value, list): # Multivalue field escaped_value = [smart_unicode(escape(val), errors='replace') for val in value] else: value = smart_unicode(value, errors='replace') escaped_value = escape(value) doc[field] = escaped_value link = None if 'link-meta' in doc: meta = json.loads(doc['link-meta']) link = get_data_link(meta) doc['externalLink'] = link doc['details'] = [] doc['hueId'] = smart_unicode(doc.get(id_field, '')) highlighted_fields = response.get('highlighting', {}).keys() if highlighted_fields and not query.get('download'): id_field = collection.get('idField') if id_field: for doc in response['response']['docs']: if id_field in doc and smart_unicode(doc[id_field]) in highlighted_fields: highlighting = response['highlighting'][smart_unicode(doc[id_field])] if highlighting: escaped_highlighting = {} for field, hls in highlighting.iteritems(): _hls = [escape(smart_unicode(hl, errors='replace')).replace('<em>', '<em>').replace('</em>', '</em>') for hl in hls] escaped_highlighting[field] = _hls[0] if len(_hls) == 1 else _hls doc.update(escaped_highlighting) else: response['warning'] = _("The Solr schema requires an id field for performing the result highlighting") if normalized_facets: augmented['normalized_facets'].extend(normalized_facets) return augmented
def _create_facet(collection, user, facet_id, facet_label, facet_field, widget_type): properties = { 'sort': 'desc', 'canRange': False, 'stacked': False, 'limit': 10, 'mincount': 0, 'isDate': False, 'aggregate': 'unique' } if widget_type in ('tree-widget', 'heatmap-widget', 'map-widget'): facet_type = 'pivot' elif widget_type == 'hit-widget': facet_type = 'function' else: solr_api = SolrApi(SOLR_URL.get(), user) range_properties = _new_range_facet(solr_api, collection, facet_field, widget_type) if range_properties: facet_type = 'range' properties.update(range_properties) properties['initial_gap'] = properties['gap'] properties['initial_start'] = properties['start'] properties['initial_end'] = properties['end'] else: facet_type = 'field' if widget_type == 'bucket-widget': facet_type = 'nested' properties['facets_form'] = { 'field': '', 'mincount': 1, 'limit': 10, 'aggregate': 'count' } properties['facets'] = [] properties['scope'] = 'stack' if widget_type in ('tree-widget', 'heatmap-widget', 'map-widget'): properties['mincount'] = 1 properties['facets'] = [] properties['stacked'] = True properties['facets_form'] = {'field': '', 'mincount': 1, 'limit': 5} if widget_type == 'map-widget': properties['scope'] = 'world' properties['limit'] = 100 else: properties[ 'scope'] = 'stack' if widget_type == 'heatmap-widget' else 'tree' return { 'id': facet_id, 'label': facet_label, 'field': facet_field, 'type': facet_type, 'widgetType': widget_type, 'properties': properties }
def __init__(self, user, api=None): self.user = user self.api = api if api is not None else SolrApi( SOLR_URL.get(), self.user, SECURITY_ENABLED.get())
def fields_data(self, user): schema_fields = SolrApi(SOLR_URL.get(), user).fields(self.name) schema_fields = schema_fields["schema"]["fields"] return sorted([self._make_field(field, attributes) for field, attributes in schema_fields.iteritems()])
def test_is_solr_cloud_mode(self): raise SkipTest # collections() no longer work SolrApi(SOLR_URL.get(), self.user).collections()
def __init__(self, user, api=None): self.user = user self.api = api if api is not None else SolrApi(user=self.user)
def _create_facet(collection, user, facet_id, facet_label, facet_field, widget_type): properties = { 'sort': 'desc', 'canRange': False, 'stacked': False, 'limit': 10, 'mincount': 0, 'isDate': False, 'aggregate': {'function': 'unique', 'ops': [], 'percentiles': [{'value': 50}]} } if widget_type in ('tree-widget', 'heatmap-widget', 'map-widget'): facet_type = 'pivot' elif widget_type == 'gradient-map-widget': facet_type = 'nested' properties['facets'] = [] properties['facets_form'] = {'field': '', 'mincount': 1, 'limit': 10, 'aggregate': 'count'} properties['scope'] = 'world' properties['limit'] = 100 else: solr_api = SolrApi(SOLR_URL.get(), user) range_properties = _new_range_facet(solr_api, collection, facet_field, widget_type) if range_properties: facet_type = 'range' properties.update(range_properties) properties['initial_gap'] = properties['gap'] properties['initial_start'] = properties['start'] properties['initial_end'] = properties['end'] else: facet_type = 'field' if widget_type in ('bucket-widget', 'pie2-widget', 'timeline-widget', 'tree2-widget', 'text-facet-widget', 'hit-widget'): if widget_type == 'text-facet-widget': properties['type'] = facet_type if widget_type == 'hit-widget': facet_type = 'function' else: facet_type = 'nested' properties['facets_form'] = {'field': '', 'mincount': 1, 'limit': 10, 'aggregate': {'function': 'unique', 'ops': [], 'percentiles': [{'value': 50}]}} properties['facets'] = [] properties['domain'] = {'blockParent': [], 'blockChildren': []} if widget_type == 'pie2-widget': properties['scope'] = 'stack' properties['timelineChartType'] = 'bar' elif widget_type == 'tree2-widget': properties['scope'] = 'tree' properties['facets_form']['limit'] = 5 properties['isOldPivot'] = True else: properties['scope'] = 'stack' properties['timelineChartType'] = 'bar' if widget_type in ('tree-widget', 'heatmap-widget', 'map-widget'): properties['mincount'] = 1 properties['facets'] = [] properties['stacked'] = True properties['facets_form'] = {'field': '', 'mincount': 1, 'limit': 5} if widget_type == 'map-widget': properties['scope'] = 'world' properties['limit'] = 100 else: properties['scope'] = 'stack' if widget_type == 'heatmap-widget' else 'tree' return { 'id': facet_id, 'label': facet_label, 'field': facet_field, 'type': facet_type, 'widgetType': widget_type, 'properties': properties }
def _create_facet(collection, user, facet_id, facet_label, facet_field, widget_type): properties = { 'sort': 'desc', 'canRange': False, 'stacked': False, 'limit': 10, 'mincount': 0, 'isDate': False, 'aggregate': { 'function': 'unique', 'ops': [], 'percentiles': [{ 'value': 50 }] } } if widget_type in ('tree-widget', 'heatmap-widget', 'map-widget'): facet_type = 'pivot' elif widget_type == 'gradient-map-widget': facet_type = 'nested' properties['facets'] = [] properties['domain'] = {'blockParent': [], 'blockChildren': []} properties['facets_form'] = { 'field': '', 'mincount': 1, 'limit': 10, 'aggregate': { 'function': 'unique', 'ops': [], 'percentiles': [{ 'value': 50 }] } } properties['scope'] = 'world' properties['limit'] = 100 else: solr_api = SolrApi(SOLR_URL.get(), user) range_properties = _new_range_facet(solr_api, collection, facet_field, widget_type) if range_properties: facet_type = 'range' properties.update(range_properties) properties['initial_gap'] = properties['gap'] properties['initial_start'] = properties['start'] properties['initial_end'] = properties['end'] else: facet_type = 'field' if widget_type in ('bucket-widget', 'pie2-widget', 'timeline-widget', 'tree2-widget', 'text-facet-widget', 'hit-widget'): if widget_type == 'text-facet-widget': properties['type'] = facet_type if widget_type == 'hit-widget': facet_type = 'function' else: facet_type = 'nested' properties['facets_form'] = { 'field': '', 'mincount': 1, 'limit': 10, 'aggregate': { 'function': 'unique', 'ops': [], 'percentiles': [{ 'value': 50 }] } } properties['facets'] = [] properties['domain'] = {'blockParent': [], 'blockChildren': []} if widget_type == 'pie2-widget': properties['scope'] = 'stack' properties['timelineChartType'] = 'bar' elif widget_type == 'tree2-widget': properties['scope'] = 'tree' properties['facets_form']['limit'] = 5 properties['isOldPivot'] = True else: properties['scope'] = 'stack' properties['timelineChartType'] = 'bar' if widget_type in ('tree-widget', 'heatmap-widget', 'map-widget'): properties['mincount'] = 1 properties['facets'] = [] properties['stacked'] = True properties['facets_form'] = {'field': '', 'mincount': 1, 'limit': 5} if widget_type == 'map-widget': properties['scope'] = 'world' properties['limit'] = 100 else: properties[ 'scope'] = 'stack' if widget_type == 'heatmap-widget' else 'tree' return { 'id': facet_id, 'label': facet_label, 'field': facet_field, 'type': facet_type, 'widgetType': widget_type, 'properties': properties, # Hue 4+ 'template': { "showFieldList": True, "showGrid": False, "showChart": True, "chartSettings": { 'chartType': 'pie' if widget_type == 'pie2-widget' else ('timeline' if widget_type == 'timeline-widget' else ('gradientmap' if widget_type == 'gradient-map-widget' else 'bars')), 'chartSorting': 'none', 'chartScatterGroup': None, 'chartScatterSize': None, 'chartScope': 'world', 'chartX': None, 'chartYSingle': None, 'chartYMulti': [], 'chartData': [], 'chartMapLabel': None, }, "fieldsAttributes": [], "fieldsAttributesFilter": "", "filteredAttributeFieldsAll": True, "fields": [], "fieldsSelected": [], "leafletmap": { 'latitudeField': None, 'longitudeField': None, 'labelField': None }, # Use own? 'leafletmapOn': False, 'isGridLayout': False, "hasDataForChart": True, "rows": 25, } }
def create_collection(self, name, fields, unique_key_field='id', df='text'): """ Create solr collection or core and instance dir. Create schema.xml file so that we can set UniqueKey field. """ if self.is_solr_cloud_mode(): # solrcloud mode # Need to remove path afterwards tmp_path, solr_config_path = utils.copy_configs( fields, unique_key_field, df, True) # Create instance directory. solrctl_path = get_solrctl_path() process = subprocess.Popen([ solrctl_path, "instancedir", "--create", name, solr_config_path ], stdout=subprocess.PIPE, stderr=subprocess.PIPE, env={ 'SOLR_ZK_ENSEMBLE': conf.SOLR_ZK_ENSEMBLE.get() }) status = process.wait() # Don't want directories laying around shutil.rmtree(tmp_path) if status != 0: LOG.error( "Could not create instance directory.\nOutput: %s\nError: %s" % process.communicate()) raise PopupException( _('Could not create instance directory. ' 'Check if solr_zk_ensemble and solrctl_path are correct in Hue config [indexer].' )) api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) if not api.create_collection(name): # Delete instance directory if we couldn't create a collection. process = subprocess.Popen( [solrctl_path, "instancedir", "--delete", name], stdout=subprocess.PIPE, stderr=subprocess.PIPE, env={'SOLR_ZK_ENSEMBLE': conf.SOLR_ZK_ENSEMBLE.get()}) if process.wait() != 0: LOG.error( "Cloud not delete collection.\nOutput: %s\nError: %s" % process.communicate()) raise PopupException( _('Could not create collection. Check error logs for more info.' )) else: # Non-solrcloud mode # Create instance directory locally. instancedir = os.path.join(conf.CORE_INSTANCE_DIR.get(), name) if os.path.exists(instancedir): raise PopupException( _("Instance directory %s already exists! Please remove it from the file system." ) % instancedir) tmp_path, solr_config_path = utils.copy_configs( fields, unique_key_field, df, False) shutil.move(solr_config_path, instancedir) shutil.rmtree(tmp_path) api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) if not api.create_core(name, instancedir): # Delete instance directory if we couldn't create a collection. shutil.rmtree(instancedir) raise PopupException( _('Could not create collection. Check error logs for more info.' ))