Exemple #1
0
def stats_ranges_query_dict_via_solr(
    stats_query, 
    default_group_size=20, 
    solr=None,
    return_pre_query_response=False):
    """ Makes stats range facet query dict by processing a solr query
    """
    if not solr:
        # Connect to solr.
        if configs.USE_TEST_SOLR_CONNECTION:
            # Connect to the testing solr server
            solr = SolrConnection(
                exit_on_error=False,
                solr_host=settings.SOLR_HOST_TEST,
                solr_port=settings.SOLR_PORT_TEST,
                solr_collection=settings.SOLR_COLLECTION_TEST
            ).connection
        else:
            # Connect to the default solr server
            solr = SolrConnection(False).connection

    response = solr.search(**stats_query)  # execute solr query
    solr_json = response.raw_content
    if not isinstance(solr_json, dict):
        return None

    if not 'stats' in solr_json:
        return None

    if not 'stats_fields' in solr_json['stats']:
        return None

    query_dict = {}
    if return_pre_query_response:
        # This is for testing purposes.
        query_dict['pre-query-response'] = solr_json
    query_dict['facet.range'] = []
    query_dict['stats.field'] = []
    for solr_field_key, stats in solr_json['stats']['stats_fields'].items():
        group_size = default_group_size
        if not stats or not stats.get('count'):
            continue
        if solr_field_key not in query_dict['facet.range']:
            query_dict['facet.range'].append(solr_field_key)
        if solr_field_key not in query_dict['stats.field']:
            query_dict['stats.field'].append(solr_field_key)
        fstart = 'f.{}.facet.range.start'.format(solr_field_key)
        fend = 'f.{}.facet.range.end'.format(solr_field_key)
        fgap = 'f.{}.facet.range.gap'.format(solr_field_key)
        findex = 'f.{}.facet.range.sort'.format(solr_field_key)
        fother = 'f.{}.facet.range.other'.format(solr_field_key)
        finclude = 'f.{}.facet.range.include'.format(solr_field_key)
        query_dict[fother] = 'all'
        query_dict[finclude] = 'all'
        query_dict[findex] = 'index'  # sort by index, not by count
        if (stats['count'] / group_size) < 3:
            group_size = 4
        if solr_field_key.endswith('___pred_date'):
            query_dict[fstart] = utilities.convert_date_to_solr_date(
                stats['min']
            )
            query_dict[fend] = utilities.convert_date_to_solr_date(
                stats['max']
            )
            query_dict[fgap] = utilities.get_date_difference_for_solr(
                stats['min'], 
                stats['max'], 
                group_size
            )
        elif solr_field_key.endswith('___pred_int'):
            query_dict[fstart] = int(round(stats['min'], 0))
            query_dict[fend] = int(round(stats['max'], 0))
            query_dict[fgap] = int(round(((stats['max'] - stats['min']) / group_size), 0))
            if query_dict[fgap] > stats['mean']:
                query_dict[fgap] = int(round((stats['mean'] / 3), 0))
            if query_dict[fgap] < 1:
                query_dict[fgap] = 1
        else:
            query_dict[fstart] = stats['min']
            query_dict[fend] = stats['max']
            query_dict[fgap] = ((stats['max'] - stats['min']) / group_size)
            if query_dict[fgap] > stats['mean']:
                query_dict[fgap] = stats['mean'] / 3
            if query_dict[fgap] == 0:
                query_dict[fgap] = 0.001
    return query_dict
Exemple #2
0
class ProjectsQuery():

    """ Methods to get projects from solr to see if we need
            to have project facet fields.
            
        If we have 1 project, then we need to show facet fields for project
        specific descriptive properties.
        
    """

    def __init__(self):
        self.solr = False
        self.solr_connect()

    def solr_connect(self):
        """ connects to solr """
        self.solr = SolrConnection(False).connection

    def check_single_project(self, query):
        """ checks to see if the query results only in a single project.
            If it does, then we need to show facet fields for project
            specific descriptive properties
        """
        single_project = False
        projs_query = self.compose_query(query)  # make the stats query
        response = self.solr.search(**projs_query)  # execute solr query
        solr_json = response.raw_content
        if isinstance(solr_json, dict):
            if 'facet_counts' in solr_json:
                if 'facet_fields' in solr_json['facet_counts']:
                    ff_dict = solr_json['facet_counts']['facet_fields']
                    if SolrDocument.ROOT_PROJECT_SOLR in ff_dict:
                        proj_list = ff_dict[SolrDocument.ROOT_PROJECT_SOLR]
                        num_projects = 0
                        last_proj_val = None
                        for proj_val in solr_json['facet_counts']['facet_fields'][SolrDocument.ROOT_PROJECT_SOLR]:
                            if isinstance(proj_val, str):
                                if '___' in proj_val:
                                    last_proj_val = proj_val
                                    num_projects += 1
                        if num_projects == 1:
                            # we have 1 project, check to make sure it's not a parent of a daughter project
                            proj_ex = last_proj_val.split('___')
                            if len(proj_ex) > 3:
                                # get a uuid from 22-kenan-tepe___id___/projects/3DE4CD9C-259E-4C14-9B03-8B10454BA66E___Kenan Tepe
                                p_uuid = proj_ex[2].replace('/projects/', '')
                                ch_projs = Project.objects\
                                                  .filter(project_uuid=p_uuid)\
                                                  .exclude(uuid=p_uuid)[:1]
                                if len(ch_projs) < 1:
                                    # this project does not have child projects, so it is OK to
                                    # consider a single project
                                    single_project = True
        return single_project

    def compose_query(self, old_query):
        """ composes a query to get a summary of
            projects that will be in shown in an old_query
        """
        query = {}
        if 'q' in old_query:
            query['q'] = old_query['q']
        if 'q.op' in old_query:
            query['q.op'] = old_query['q.op']
        if 'fq' in old_query:
            query['fq'] = old_query['fq']
        query['debugQuery'] = 'false'
        query['facet'] = 'true'
        query['facet.mincount'] = 1
        query['rows'] = 0
        query['start'] = 0
        query['facet.field'] = [SolrDocument.ROOT_PROJECT_SOLR]
        return query
Exemple #3
0
class StatsQuery():

    """ Methods to get stats information
        for 1 or more fields from Solr.

        This is useful in composing queries for
        numeric range facets where we don't know
        the min or max of the filtered set
    """

    def __init__(self):
        self.solr = False
        self.solr_connect()
        self.solr_response = False
        self.stats_fields = []
        self.q = '*:*'  # main solr query
        self.q_op = 'AND'  # default operator for q terms
        self.fq = []  # filter query

    def solr_connect(self):
        """ connects to solr """
        self.solr = SolrConnection(False).connection

    def add_stats_ranges_from_solr(self, query):
        """ gets solr stats by searching solr
            searches solr to get raw solr search results
        """
        stats_query = self.compose_query()  # make the stats query
        response = self.solr.search(**stats_query)  # execute solr query
        solr_json = response.raw_content
        if isinstance(solr_json, dict):
            if 'stats' in solr_json:
                if 'stats_fields' in solr_json['stats']:
                    qm = QueryMaker()
                    groups = qm.histogram_groups
                    for solr_field_key, stats in solr_json['stats']['stats_fields'].items():
                        if stats is not None:
                            if solr_field_key not in query['facet.range']:
                                query['facet.range'].append(solr_field_key)
                            if solr_field_key not in query['stats.field']:
                                query['stats.field'].append(solr_field_key)
                            fstart = 'f.' + solr_field_key + '.facet.range.start'
                            fend = 'f.' + solr_field_key + '.facet.range.end'
                            fgap = 'f.' + solr_field_key + '.facet.range.gap'
                            findex = 'f.' + solr_field_key + '.facet.sort'
                            fother = 'f.' + solr_field_key + '.facet.range.other'
                            finclude = 'f.' + solr_field_key + '.facet.range.include'
                            query[fother] = 'all'
                            query[finclude] = 'all'
                            if 'count' in stats:
                                if (stats['count'] / qm.histogram_groups) < 3:
                                    groups = 4
                            if '___pred_date' in solr_field_key:
                                query[fstart] = qm.convert_date_to_solr_date(stats['min'])
                                query[fend] = qm.convert_date_to_solr_date(stats['max'])
                                query[fgap] = qm.get_date_difference_for_solr(stats['min'], stats['max'], groups)
                                query[findex] = 'index'  # sort by index, not by count
                            else:
                                query[fstart] = stats['min']
                                query[fend] = stats['max']
                                query[fgap] = ((stats['max'] - stats['min']) / groups)
                                if query[fgap] > stats['mean']:
                                    query[fgap] = stats['mean'] / 3;
                                # query[fgap] = ((stats['max'] - stats['min']) / groups) - ((stats['max'] - stats['min']) / groups) * .01
                                query[findex] = 'index'  # sort by index, not by count
        return query

    def compose_query(self):
        """ composes a stats query
            using attributes in this class
        """
        query = {}
        query['debugQuery'] = 'false'
        query['stats'] = 'true'
        query['rows'] = 0
        query['q'] = self.q
        query['fq'] = self.fq
        query['stats.field'] = self.stats_fields
        return query
Exemple #4
0
class ProjectsQuery():
    """ Methods to get projects from solr to see if we need
            to have project facet fields.
            
        If we have 1 project, then we need to show facet fields for project
        specific descriptive properties.
        
    """
    def __init__(self):
        self.solr = False
        self.solr_connect()

    def solr_connect(self):
        """ connects to solr """
        self.solr = SolrConnection(False).connection

    def check_single_project(self, query):
        """ checks to see if the query results only in a single project.
            If it does, then we need to show facet fields for project
            specific descriptive properties
        """
        single_project = False
        projs_query = self.compose_query(query)  # make the stats query
        response = self.solr.search(**projs_query)  # execute solr query
        solr_json = response.raw_content
        if isinstance(solr_json, dict):
            if 'facet_counts' in solr_json:
                if 'facet_fields' in solr_json['facet_counts']:
                    ff_dict = solr_json['facet_counts']['facet_fields']
                    if SolrDocument.ROOT_PROJECT_SOLR in ff_dict:
                        proj_list = ff_dict[SolrDocument.ROOT_PROJECT_SOLR]
                        num_projects = 0
                        last_proj_val = None
                        for proj_val in solr_json['facet_counts'][
                                'facet_fields'][
                                    SolrDocument.ROOT_PROJECT_SOLR]:
                            if isinstance(proj_val, str):
                                if '___' in proj_val:
                                    last_proj_val = proj_val
                                    num_projects += 1
                        if num_projects == 1:
                            # we have 1 project, check to make sure it's not a parent of a daughter project
                            proj_ex = last_proj_val.split('___')
                            if len(proj_ex) > 3:
                                # get a uuid from 22-kenan-tepe___id___/projects/3DE4CD9C-259E-4C14-9B03-8B10454BA66E___Kenan Tepe
                                p_uuid = proj_ex[2].replace('/projects/', '')
                                ch_projs = Project.objects\
                                                  .filter(project_uuid=p_uuid)\
                                                  .exclude(uuid=p_uuid)[:1]
                                if len(ch_projs) < 1:
                                    # this project does not have child projects, so it is OK to
                                    # consider a single project
                                    single_project = True
        return single_project

    def compose_query(self, old_query):
        """ composes a query to get a summary of
            projects that will be in shown in an old_query
        """
        query = {}
        if 'q' in old_query:
            query['q'] = old_query['q']
        if 'q.op' in old_query:
            query['q.op'] = old_query['q.op']
        if 'fq' in old_query:
            query['fq'] = old_query['fq']
        query['debugQuery'] = 'false'
        query['facet'] = 'true'
        query['facet.mincount'] = 1
        query['rows'] = 0
        query['start'] = 0
        query['facet.field'] = [SolrDocument.ROOT_PROJECT_SOLR]
        return query