def solr_connect(self): """ Connects to solr """ if self.solr is not None: # We are already connected, so skip connecting. return None if configs.USE_TEST_SOLR_CONNECTION: # Connect to the testing solr server self.solr = SolrConnection( exit_on_error=False, solr_host=settings.SOLR_HOST_TEST, solr_port=settings.SOLR_PORT_TEST, solr_collection=settings.SOLR_COLLECTION_TEST ).connection else: # Connect to the default solr server self.solr = SolrConnection(False).connection
def __init__(self): ''' To use, import this library and instantiate a crawler object: crawler = Crawler() Then crawl as follows: crawler.crawl() Crawling a single document is also supported with the index_single_document method. Just provide the document's UUID. For example: crawler.index_single_document('9E474B89-E36B-4B9D-2D38-7C7CCBDBB030') ''' # The list of Open Context items to crawl self.uuidlist = UUIDList().uuids # Connect to Solr self.solr = SolrConnection().connection
def stats_ranges_query_dict_via_solr( stats_query, default_group_size=20, solr=None, return_pre_query_response=False): """ Makes stats range facet query dict by processing a solr query """ if not solr: # Connect to solr. if configs.USE_TEST_SOLR_CONNECTION: # Connect to the testing solr server solr = SolrConnection( exit_on_error=False, solr_host=settings.SOLR_HOST_TEST, solr_port=settings.SOLR_PORT_TEST, solr_collection=settings.SOLR_COLLECTION_TEST ).connection else: # Connect to the default solr server solr = SolrConnection(False).connection response = solr.search(**stats_query) # execute solr query solr_json = response.raw_content if not isinstance(solr_json, dict): return None if not 'stats' in solr_json: return None if not 'stats_fields' in solr_json['stats']: return None query_dict = {} if return_pre_query_response: # This is for testing purposes. query_dict['pre-query-response'] = solr_json query_dict['facet.range'] = [] query_dict['stats.field'] = [] for solr_field_key, stats in solr_json['stats']['stats_fields'].items(): group_size = default_group_size if not stats or not stats.get('count'): continue if solr_field_key not in query_dict['facet.range']: query_dict['facet.range'].append(solr_field_key) if solr_field_key not in query_dict['stats.field']: query_dict['stats.field'].append(solr_field_key) fstart = 'f.{}.facet.range.start'.format(solr_field_key) fend = 'f.{}.facet.range.end'.format(solr_field_key) fgap = 'f.{}.facet.range.gap'.format(solr_field_key) findex = 'f.{}.facet.range.sort'.format(solr_field_key) fother = 'f.{}.facet.range.other'.format(solr_field_key) finclude = 'f.{}.facet.range.include'.format(solr_field_key) query_dict[fother] = 'all' query_dict[finclude] = 'all' query_dict[findex] = 'index' # sort by index, not by count if (stats['count'] / group_size) < 3: group_size = 4 if solr_field_key.endswith('___pred_date'): query_dict[fstart] = utilities.convert_date_to_solr_date( stats['min'] ) query_dict[fend] = utilities.convert_date_to_solr_date( stats['max'] ) query_dict[fgap] = utilities.get_date_difference_for_solr( stats['min'], stats['max'], group_size ) elif solr_field_key.endswith('___pred_int'): query_dict[fstart] = int(round(stats['min'], 0)) query_dict[fend] = int(round(stats['max'], 0)) query_dict[fgap] = int(round(((stats['max'] - stats['min']) / group_size), 0)) if query_dict[fgap] > stats['mean']: query_dict[fgap] = int(round((stats['mean'] / 3), 0)) if query_dict[fgap] < 1: query_dict[fgap] = 1 else: query_dict[fstart] = stats['min'] query_dict[fend] = stats['max'] query_dict[fgap] = ((stats['max'] - stats['min']) / group_size) if query_dict[fgap] > stats['mean']: query_dict[fgap] = stats['mean'] / 3 if query_dict[fgap] == 0: query_dict[fgap] = 0.001 return query_dict
def solr_connect(self): """ connects to solr """ self.solr = SolrConnection(False).connection
def __init__(self): # Connect to Solr self.solr = SolrConnection().connection self.request_error = False