예제 #1
0
    def __init__(self):
        '''
        To use, import this library and instantiate a crawler object:


from opencontext_py.apps.indexer.crawler import Crawler
crawler = Crawler()

# Then crawl as follows:

crawler.crawl(100)

        Crawling a single document is also supported with the
        index_single_document method. Just provide the document's UUID.
        For example:

        crawler.index_single_document('9E474B89-E36B-4B9D-2D38-7C7CCBDBB030')
        '''
        # The list of Open Context items to crawl
        self.uuidlist = UUIDList().uuids
        # Connect to Solr
        self.solr = SolrConnection().connection
        # Flag as human remains sensitive
        self.human_remains = 0  # 0 means no, positive values means yes
        self.max_geo_zoom = 30  # set the max geo tile zoom we should use for indexing
예제 #2
0
    def solr_connect(self):
        """ Connects to solr """
        if self.solr is not None:
            # We are already connected, so skip connecting.
            return None

        if configs.USE_TEST_SOLR_CONNECTION:
            # Connect to the testing solr server
            self.solr = SolrConnection(
                exit_on_error=False,
                solr_host=settings.SOLR_HOST_TEST,
                solr_port=settings.SOLR_PORT_TEST,
                solr_collection=settings.SOLR_COLLECTION_TEST
            ).connection
        else:
            # Connect to the default solr server
            self.solr = SolrConnection(False).connection
예제 #3
0
    def __init__(self):
        '''
        To use, import this library and instantiate a crawler object:

        crawler = Crawler()

        Then crawl as follows:

        crawler.crawl()

        Crawling a single document is also supported with the
        index_single_document method. Just provide the document's UUID.
        For example:

        crawler.index_single_document('9E474B89-E36B-4B9D-2D38-7C7CCBDBB030')
        '''
        # The list of Open Context items to crawl
        self.uuidlist = UUIDList().uuids
        # Connect to Solr
        self.solr = SolrConnection().connection
예제 #4
0
    def __init__(self):
        '''
        To use, import this library and instantiate a crawler object:

        crawler = Crawler()

        Then crawl as follows:

        crawler.crawl()

        Crawling a single document is also supported with the
        index_single_document method. Just provide the document's UUID.
        For example:

        crawler.index_single_document('9E474B89-E36B-4B9D-2D38-7C7CCBDBB030')
        '''
        # The list of Open Context items to crawl
        self.uuidlist = UUIDList().uuids
        # Connect to Solr
        self.solr = SolrConnection().connection
예제 #5
0
class Crawler():
    '''
    The Open Context Crawler indexes Open Context items and makes them
    searchable in Apache Solr.
    '''
    def __init__(self):
        '''
        To use, import this library and instantiate a crawler object:


from opencontext_py.apps.indexer.crawler import Crawler
crawler = Crawler()

# Then crawl as follows:

crawler.crawl(100)

        Crawling a single document is also supported with the
        index_single_document method. Just provide the document's UUID.
        For example:

        crawler.index_single_document('9E474B89-E36B-4B9D-2D38-7C7CCBDBB030')
        '''
        # The list of Open Context items to crawl
        self.uuidlist = UUIDList().uuids
        # Connect to Solr
        self.solr = SolrConnection().connection
        # Flag as human remains sensitive
        self.human_remains = 0  # 0 means no, positive values means yes
        self.max_geo_zoom = 30  # set the max geo tile zoom we should use for indexing

    def crawl(self, chunksize=100):
        '''
        For efficiency, this method processes documents in "chunks." The
        default chunk size is 100, but one can specify other values. For
        example, to specify a chunksize of 500, use this method as follows:

        crawler = Crawler()
        crawler.crawl(500)
        '''

        # Get a logger
        logger = logging.getLogger(__name__)

        start_time = time.time()
        print('\n\nStarting crawl...\n')
        print("(#)\tUUID")
        print('--------------------------------------------')
        document_count = 0
        while self.uuidlist is not None:
            documents = []
            # Process the UUID list in chunks
            ok_manifests = []
            for uuid in islice(self.uuidlist, 0, chunksize):
                try:
                    sd_obj = SolrDocument(uuid)
                    if isinstance(self.max_geo_zoom, int):
                        if self.max_geo_zoom > 5:
                            # only positive integers
                            sd_obj.max_geo_zoom = self.max_geo_zoom
                    sd_obj.process_item()
                    solrdocument = sd_obj.fields 
                    if crawlutil().is_valid_document(solrdocument):
                        try:
                            manifest = Manifest.objects.get(uuid=uuid)
                            ok_manifests.append(manifest)
                            # manifest.indexed_save()  # saves the time this was indexed
                        except Manifest.DoesNotExist:
                            print("Error: {0} Database bizzare error -----> " + uuid)
                            logger.error('[' + datetime.now().strftime('%x %X ') +
                                         settings.TIME_ZONE + '] Error: Missing manifest record for => ' + uuid)
                        documents.append(solrdocument)
                        document_count += 1
                        print("(" + str(document_count) + ")\t" + uuid)
                    else:
                        print('Error: Skipping document due to a datatype '
                              'mismatch -----> ' + uuid)
                except Exception as error:
                    print("Error: {0}".format(error) + " -----> " + uuid)
                    logger.error('[' + datetime.now().strftime('%x %X ') +
                                 settings.TIME_ZONE + '] Error: ' + str(error)
                                 + ' => ' + uuid)
            # Send the documents to Solr while saving the
            # response status code (e.g, 200, 400, etc.)
            solr_status = self.solr.update(documents, 'json',
                                           commit=False).status
            if solr_status == 200:
                self.solr.commit()
                print('--------------------------------------------')
                print('Crawl Rate: ' + crawlutil().get_crawl_rate_in_seconds(
                    document_count, start_time) + ' documents per second')
                print('Updating indexed time for ' + str(len(ok_manifests)) + ' manifest objects.')
                for manifest in ok_manifests:
                    manifest.indexed_save()  # saves the time this was indexed
                print('--------------------------------------------')
            else:
                print('Error: ' + str(self.solr.update(
                    documents, 'json', commit=False
                    ).raw_content['error']['msg']))
        # Once the crawl has completed...
        self.solr.optimize()
        print('\n--------------------------------------------')
        print('Crawl completed')
        print('--------------------------------------------\n')

    def index_document_list(self,
                            uuid_list,
                            chunksize=20,
                            stop_at_invalid=True):
        """
        Indexes a list of uuids. The list is generated elsewhere.
        """
        if isinstance(uuid_list, list):
            document_count = 0
            documents = []
            total_count = len(uuid_list)
            i = 0;
            for uuid in uuid_list:
                try:
                    manifest = Manifest.objects.get(uuid=uuid)
                except Manifest.DoesNotExist:
                    print('Where is ' + uuid + ' in the manifest?')
                    manifest = False
                if manifest is not False:
                    try:
                        sd_obj = SolrDocument(uuid)
                        if isinstance(self.max_geo_zoom, int):
                            if self.max_geo_zoom > 5:
                                # only positive integers
                                sd_obj.max_geo_zoom = self.max_geo_zoom
                        sd_obj.process_item()
                        if isinstance(self.human_remains, int):
                            if self.human_remains > 0:
                                sd_obj.fields['human_remains'] = self.human_remains
                        solrdocument = sd_obj.fields
                        if crawlutil().is_valid_document(solrdocument):
                            if solrdocument is not None:
                                i += 1
                                print('OK to index: ' + uuid)
                                documents.append(solrdocument)
                                manifest.indexed_save()  # saves the time this was indexed
                            else:
                                print('Something wrong with: ' + uuid)
                                if stop_at_invalid:
                                    break
                        else:
                            print('Not valid: ' + uuid)
                            if stop_at_invalid:
                                break
                    except Exception as error:
                        print("Error: {0}".format(error) + " -----> " + uuid)
                        if stop_at_invalid:
                                break
                if len(documents) >= chunksize:
                    ok = self.commit_documents(documents, i, total_count)
                    if ok is False and stop_at_invalid:
                        # a problem in committing the documents
                        break
                    documents = []
            # now finish off the remaining documents
            if len(documents) > 0:
                ok = self.commit_documents(documents, i, total_count)

    def commit_documents(self,
                         documents,
                         last_index=False,
                         total_docs=False):
        """ commits a set of documents to the Solr index """
        ok = False
        solr_status = self.solr.update(documents, 'json',
                                       commit=False).status
        if solr_status == 200:
            last_message = ''
            if last_index is not False and total_docs is not False:
                last_message = '(' + str(last_index) + ' of ' + str(total_docs) + ')'
            ok = True
            self.solr.commit()
            print('--------------------------------------------')
            print('Committed : ' + str(len(documents)) + ' docs. ' + last_message)
            print('--------------------------------------------')
        else:
            print('Error: ' + \
                  str(self.solr.update(
                  documents, 'json', commit=False
                  )))
        return ok

    def index_single_document(self, uuid):
        '''
        Use this method to crawl a single document. Provide the item's
        UUID as an argument. For example:

        crawler = Crawler()
        crawler.index_single_document('9E474B89-E36B-4B9D-2D38-7C7CCBDBB030')
        '''
        print('\nAttempting to index document ' + uuid + '...\n')
        start_time = time.time()
        try:
            sd_obj = SolrDocument(uuid)
            sd_obj.process_item()
            solrdocument = sd_obj.fields
            if crawlutil().is_valid_document(solrdocument):
                # Commit the document and save the response status.
                # Note: solr.update() expects a list
                solr_status = self.solr.update(
                    [solrdocument], 'json', commit=True).status
                if solr_status == 200:
                    print('Successfully indexed ' + uuid + ' in ' +
                          crawlutil().get_elapsed_time_in_seconds(start_time)
                          + ' seconds.')
                else:
                    print('Error: ' + str(self.solr.update(
                        [solrdocument], 'json', commit=True
                        ).raw_content['error']['msg'])
                    )
            else:
                print('Error: Unable to index ' + uuid + ' due to '
                      'a datatype mismatch.')
        except TypeError as error:
            print("Type Error {0}".format(error) + ": Unable to process document " + uuid + '.')
        except Exception as error:
            print("Error: {0}".format(error) + " -----> " + uuid)
예제 #6
0
class Crawler():
    '''
    The Open Context Crawler indexes Open Context items and makes them
    searchable in Apache Solr.
    '''
    def __init__(self):
        '''
        To use, import this library and instantiate a crawler object:

        crawler = Crawler()

        Then crawl as follows:

        crawler.crawl()

        Crawling a single document is also supported with the
        index_single_document method. Just provide the document's UUID.
        For example:

        crawler.index_single_document('9E474B89-E36B-4B9D-2D38-7C7CCBDBB030')
        '''
        # The list of Open Context items to crawl
        self.uuidlist = UUIDList().uuids
        # Connect to Solr
        self.solr = SolrConnection().connection

    def crawl(self, chunksize=100):
        '''
        For efficiency, this method processes documents in "chunks." The
        default chunk size is 100, but one can specify other values. For
        example, to specify a chunksize of 500, use this method as follows:

        crawler = Crawler()
        crawler.crawl(500)
        '''

        # Get a logger
        logger = logging.getLogger(__name__)

        start_time = time.time()
        print('\n\nStarting crawl...\n')
        print("(#)\tUUID")
        print('--------------------------------------------')
        document_count = 0
        while self.uuidlist is not None:
            documents = []
            # Process the UUID list in chunks
            for uuid in islice(self.uuidlist, 0, chunksize):
                try:
                    solrdocument = SolrDocument(uuid).fields
                    if crawlutil().is_valid_document(solrdocument):
                        try:
                            manifest = Manifest.objects.get(uuid=uuid)
                            manifest.indexed_save(
                            )  # saves the time this was indexed
                        except Manifest.DoesNotExist:
                            print("Error: {0} Database bizzare error -----> " +
                                  uuid)
                            logger.error(
                                '[' + datetime.now().strftime('%x %X ') +
                                settings.TIME_ZONE +
                                '] Error: Missing manifest record for => ' +
                                uuid)
                        documents.append(solrdocument)
                        document_count += 1
                        print("(" + str(document_count) + ")\t" + uuid)
                    else:
                        print('Error: Skipping document due to a datatype '
                              'mismatch -----> ' + uuid)
                except Exception as error:
                    print("Error: {0}".format(error) + " -----> " + uuid)
                    logger.error('[' + datetime.now().strftime('%x %X ') +
                                 settings.TIME_ZONE + '] Error: ' +
                                 str(error) + ' => ' + uuid)
            # Send the documents to Solr while saving the
            # response status code (e.g, 200, 400, etc.)
            solr_status = self.solr.update(documents, 'json',
                                           commit=False).status
            if solr_status == 200:
                self.solr.commit()
                print('--------------------------------------------')
                print('Crawl Rate: ' + crawlutil().get_crawl_rate_in_seconds(
                    document_count, start_time) + ' documents per second')
                print('--------------------------------------------')
            else:
                print('Error: ' + str(
                    self.solr.update(documents, 'json', commit=False).
                    raw_content['error']['msg']))
        # Once the crawl has completed...
        self.solr.optimize()
        print('\n--------------------------------------------')
        print('Crawl completed')
        print('--------------------------------------------\n')

    def index_document_list(self,
                            uuid_list,
                            chunksize=20,
                            stop_at_invalid=True):
        """
        Indexes a list of uuids. The list is generated elsewhere.
        """
        if isinstance(uuid_list, list):
            document_count = 0
            documents = []
            total_count = len(uuid_list)
            i = 0
            for uuid in uuid_list:
                try:
                    manifest = Manifest.objects.get(uuid=uuid)
                except Manifest.DoesNotExist:
                    print('Where is ' + uuid + ' in the manifest?')
                    manifest = False
                if manifest is not False:
                    try:
                        solrdocument = SolrDocument(uuid).fields
                        if crawlutil().is_valid_document(solrdocument):
                            i += 1
                            print('OK to index: ' + uuid)
                            documents.append(solrdocument)
                            manifest.indexed_save(
                            )  # saves the time this was indexed
                        else:
                            print('Not valid: ' + uuid)
                            if stop_at_invalid:
                                break
                    except Exception as error:
                        print("Error: {0}".format(error) + " -----> " + uuid)
                        if stop_at_invalid:
                            break
                if len(documents) >= chunksize:
                    ok = self.commit_documents(documents, i, total_count)
                    if ok is False and stop_at_invalid:
                        # a problem in committing the documents
                        break
                    documents = []
            # now finish off the remaining documents
            if len(documents) > 0:
                ok = self.commit_documents(documents, i, total_count)

    def commit_documents(self, documents, last_index=False, total_docs=False):
        """ commits a set of documents to the Solr index """
        ok = False
        solr_status = self.solr.update(documents, 'json', commit=False).status
        if solr_status == 200:
            last_message = ''
            if last_index is not False and total_docs is not False:
                last_message = '(' + str(last_index) + ' of ' + str(
                    total_docs) + ')'
            ok = True
            self.solr.commit()
            print('--------------------------------------------')
            print('Committed : ' + str(len(documents)) + ' docs. ' +
                  last_message)
            print('--------------------------------------------')
        else:
            print('Error: ' + \
                  str(self.solr.update(
                  documents, 'json', commit=False
                  ).raw_content['error']['msg']))
        return ok

    def index_single_document(self, uuid):
        '''
        Use this method to crawl a single document. Provide the item's
        UUID as an argument. For example:

        crawler = Crawler()
        crawler.index_single_document('9E474B89-E36B-4B9D-2D38-7C7CCBDBB030')
        '''
        print('\nAttempting to index document ' + uuid + '...\n')
        start_time = time.time()
        try:
            solrdocument = SolrDocument(uuid).fields
            if crawlutil().is_valid_document(solrdocument):
                # Commit the document and save the response status.
                # Note: solr.update() expects a list
                solr_status = self.solr.update([solrdocument],
                                               'json',
                                               commit=True).status
                if solr_status == 200:
                    print('Successfully indexed ' + uuid + ' in ' +
                          crawlutil().get_elapsed_time_in_seconds(start_time) +
                          ' seconds.')
                else:
                    print('Error: ' + str(
                        self.solr.update([solrdocument], 'json', commit=True).
                        raw_content['error']['msg']))
            else:
                print('Error: Unable to index ' + uuid + ' due to '
                      'a datatype mismatch.')
        except TypeError:
            print("Error: Unable to process document " + uuid + '.')
        except Exception as error:
            print("Error: {0}".format(error) + " -----> " + uuid)
예제 #7
0
def stats_ranges_query_dict_via_solr(
    stats_query, 
    default_group_size=20, 
    solr=None,
    return_pre_query_response=False):
    """ Makes stats range facet query dict by processing a solr query
    """
    if not solr:
        # Connect to solr.
        if configs.USE_TEST_SOLR_CONNECTION:
            # Connect to the testing solr server
            solr = SolrConnection(
                exit_on_error=False,
                solr_host=settings.SOLR_HOST_TEST,
                solr_port=settings.SOLR_PORT_TEST,
                solr_collection=settings.SOLR_COLLECTION_TEST
            ).connection
        else:
            # Connect to the default solr server
            solr = SolrConnection(False).connection

    response = solr.search(**stats_query)  # execute solr query
    solr_json = response.raw_content
    if not isinstance(solr_json, dict):
        return None

    if not 'stats' in solr_json:
        return None

    if not 'stats_fields' in solr_json['stats']:
        return None

    query_dict = {}
    if return_pre_query_response:
        # This is for testing purposes.
        query_dict['pre-query-response'] = solr_json
    query_dict['facet.range'] = []
    query_dict['stats.field'] = []
    for solr_field_key, stats in solr_json['stats']['stats_fields'].items():
        group_size = default_group_size
        if not stats or not stats.get('count'):
            continue
        if solr_field_key not in query_dict['facet.range']:
            query_dict['facet.range'].append(solr_field_key)
        if solr_field_key not in query_dict['stats.field']:
            query_dict['stats.field'].append(solr_field_key)
        fstart = 'f.{}.facet.range.start'.format(solr_field_key)
        fend = 'f.{}.facet.range.end'.format(solr_field_key)
        fgap = 'f.{}.facet.range.gap'.format(solr_field_key)
        findex = 'f.{}.facet.range.sort'.format(solr_field_key)
        fother = 'f.{}.facet.range.other'.format(solr_field_key)
        finclude = 'f.{}.facet.range.include'.format(solr_field_key)
        query_dict[fother] = 'all'
        query_dict[finclude] = 'all'
        query_dict[findex] = 'index'  # sort by index, not by count
        if (stats['count'] / group_size) < 3:
            group_size = 4
        if solr_field_key.endswith('___pred_date'):
            query_dict[fstart] = utilities.convert_date_to_solr_date(
                stats['min']
            )
            query_dict[fend] = utilities.convert_date_to_solr_date(
                stats['max']
            )
            query_dict[fgap] = utilities.get_date_difference_for_solr(
                stats['min'], 
                stats['max'], 
                group_size
            )
        elif solr_field_key.endswith('___pred_int'):
            query_dict[fstart] = int(round(stats['min'], 0))
            query_dict[fend] = int(round(stats['max'], 0))
            query_dict[fgap] = int(round(((stats['max'] - stats['min']) / group_size), 0))
            if query_dict[fgap] > stats['mean']:
                query_dict[fgap] = int(round((stats['mean'] / 3), 0))
            if query_dict[fgap] < 1:
                query_dict[fgap] = 1
        else:
            query_dict[fstart] = stats['min']
            query_dict[fend] = stats['max']
            query_dict[fgap] = ((stats['max'] - stats['min']) / group_size)
            if query_dict[fgap] > stats['mean']:
                query_dict[fgap] = stats['mean'] / 3
            if query_dict[fgap] == 0:
                query_dict[fgap] = 0.001
    return query_dict
예제 #8
0
class StatsQuery():

    """ Methods to get stats information
        for 1 or more fields from Solr.

        This is useful in composing queries for
        numeric range facets where we don't know
        the min or max of the filtered set
    """

    def __init__(self):
        self.solr = False
        self.solr_connect()
        self.solr_response = False
        self.stats_fields = []
        self.q = '*:*'  # main solr query
        self.q_op = 'AND'  # default operator for q terms
        self.fq = []  # filter query

    def solr_connect(self):
        """ connects to solr """
        self.solr = SolrConnection(False).connection

    def add_stats_ranges_from_solr(self, query):
        """ gets solr stats by searching solr
            searches solr to get raw solr search results
        """
        stats_query = self.compose_query()  # make the stats query
        response = self.solr.search(**stats_query)  # execute solr query
        solr_json = response.raw_content
        if isinstance(solr_json, dict):
            if 'stats' in solr_json:
                if 'stats_fields' in solr_json['stats']:
                    qm = QueryMaker()
                    groups = qm.histogram_groups
                    for solr_field_key, stats in solr_json['stats']['stats_fields'].items():
                        if stats is not None:
                            if solr_field_key not in query['facet.range']:
                                query['facet.range'].append(solr_field_key)
                            if solr_field_key not in query['stats.field']:
                                query['stats.field'].append(solr_field_key)
                            fstart = 'f.' + solr_field_key + '.facet.range.start'
                            fend = 'f.' + solr_field_key + '.facet.range.end'
                            fgap = 'f.' + solr_field_key + '.facet.range.gap'
                            findex = 'f.' + solr_field_key + '.facet.sort'
                            fother = 'f.' + solr_field_key + '.facet.range.other'
                            finclude = 'f.' + solr_field_key + '.facet.range.include'
                            query[fother] = 'all'
                            query[finclude] = 'all'
                            if 'count' in stats:
                                if (stats['count'] / qm.histogram_groups) < 3:
                                    groups = 4
                            if '___pred_date' in solr_field_key:
                                query[fstart] = qm.convert_date_to_solr_date(stats['min'])
                                query[fend] = qm.convert_date_to_solr_date(stats['max'])
                                query[fgap] = qm.get_date_difference_for_solr(stats['min'], stats['max'], groups)
                                query[findex] = 'index'  # sort by index, not by count
                            else:
                                query[fstart] = stats['min']
                                query[fend] = stats['max']
                                query[fgap] = ((stats['max'] - stats['min']) / groups)
                                if query[fgap] > stats['mean']:
                                    query[fgap] = stats['mean'] / 3;
                                # query[fgap] = ((stats['max'] - stats['min']) / groups) - ((stats['max'] - stats['min']) / groups) * .01
                                query[findex] = 'index'  # sort by index, not by count
        return query

    def compose_query(self):
        """ composes a stats query
            using attributes in this class
        """
        query = {}
        query['debugQuery'] = 'false'
        query['stats'] = 'true'
        query['rows'] = 0
        query['q'] = self.q
        query['fq'] = self.fq
        query['stats.field'] = self.stats_fields
        return query
예제 #9
0
 def solr_connect(self):
     """ connects to solr """
     self.solr = SolrConnection(False).connection
예제 #10
0
class ProjectsQuery():

    """ Methods to get projects from solr to see if we need
            to have project facet fields.
            
        If we have 1 project, then we need to show facet fields for project
        specific descriptive properties.
        
    """

    def __init__(self):
        self.solr = False
        self.solr_connect()

    def solr_connect(self):
        """ connects to solr """
        self.solr = SolrConnection(False).connection

    def check_single_project(self, query):
        """ checks to see if the query results only in a single project.
            If it does, then we need to show facet fields for project
            specific descriptive properties
        """
        single_project = False
        projs_query = self.compose_query(query)  # make the stats query
        response = self.solr.search(**projs_query)  # execute solr query
        solr_json = response.raw_content
        if isinstance(solr_json, dict):
            if 'facet_counts' in solr_json:
                if 'facet_fields' in solr_json['facet_counts']:
                    ff_dict = solr_json['facet_counts']['facet_fields']
                    if SolrDocument.ROOT_PROJECT_SOLR in ff_dict:
                        proj_list = ff_dict[SolrDocument.ROOT_PROJECT_SOLR]
                        num_projects = 0
                        last_proj_val = None
                        for proj_val in solr_json['facet_counts']['facet_fields'][SolrDocument.ROOT_PROJECT_SOLR]:
                            if isinstance(proj_val, str):
                                if '___' in proj_val:
                                    last_proj_val = proj_val
                                    num_projects += 1
                        if num_projects == 1:
                            # we have 1 project, check to make sure it's not a parent of a daughter project
                            proj_ex = last_proj_val.split('___')
                            if len(proj_ex) > 3:
                                # get a uuid from 22-kenan-tepe___id___/projects/3DE4CD9C-259E-4C14-9B03-8B10454BA66E___Kenan Tepe
                                p_uuid = proj_ex[2].replace('/projects/', '')
                                ch_projs = Project.objects\
                                                  .filter(project_uuid=p_uuid)\
                                                  .exclude(uuid=p_uuid)[:1]
                                if len(ch_projs) < 1:
                                    # this project does not have child projects, so it is OK to
                                    # consider a single project
                                    single_project = True
        return single_project

    def compose_query(self, old_query):
        """ composes a query to get a summary of
            projects that will be in shown in an old_query
        """
        query = {}
        if 'q' in old_query:
            query['q'] = old_query['q']
        if 'q.op' in old_query:
            query['q.op'] = old_query['q.op']
        if 'fq' in old_query:
            query['fq'] = old_query['fq']
        query['debugQuery'] = 'false'
        query['facet'] = 'true'
        query['facet.mincount'] = 1
        query['rows'] = 0
        query['start'] = 0
        query['facet.field'] = [SolrDocument.ROOT_PROJECT_SOLR]
        return query
예제 #11
0
 def __init__(self):
     # Connect to Solr
     self.solr = SolrConnection().connection
     self.request_error = False
예제 #12
0
class ProjectsQuery():
    """ Methods to get projects from solr to see if we need
            to have project facet fields.
            
        If we have 1 project, then we need to show facet fields for project
        specific descriptive properties.
        
    """
    def __init__(self):
        self.solr = False
        self.solr_connect()

    def solr_connect(self):
        """ connects to solr """
        self.solr = SolrConnection(False).connection

    def check_single_project(self, query):
        """ checks to see if the query results only in a single project.
            If it does, then we need to show facet fields for project
            specific descriptive properties
        """
        single_project = False
        projs_query = self.compose_query(query)  # make the stats query
        response = self.solr.search(**projs_query)  # execute solr query
        solr_json = response.raw_content
        if isinstance(solr_json, dict):
            if 'facet_counts' in solr_json:
                if 'facet_fields' in solr_json['facet_counts']:
                    ff_dict = solr_json['facet_counts']['facet_fields']
                    if SolrDocument.ROOT_PROJECT_SOLR in ff_dict:
                        proj_list = ff_dict[SolrDocument.ROOT_PROJECT_SOLR]
                        num_projects = 0
                        last_proj_val = None
                        for proj_val in solr_json['facet_counts'][
                                'facet_fields'][
                                    SolrDocument.ROOT_PROJECT_SOLR]:
                            if isinstance(proj_val, str):
                                if '___' in proj_val:
                                    last_proj_val = proj_val
                                    num_projects += 1
                        if num_projects == 1:
                            # we have 1 project, check to make sure it's not a parent of a daughter project
                            proj_ex = last_proj_val.split('___')
                            if len(proj_ex) > 3:
                                # get a uuid from 22-kenan-tepe___id___/projects/3DE4CD9C-259E-4C14-9B03-8B10454BA66E___Kenan Tepe
                                p_uuid = proj_ex[2].replace('/projects/', '')
                                ch_projs = Project.objects\
                                                  .filter(project_uuid=p_uuid)\
                                                  .exclude(uuid=p_uuid)[:1]
                                if len(ch_projs) < 1:
                                    # this project does not have child projects, so it is OK to
                                    # consider a single project
                                    single_project = True
        return single_project

    def compose_query(self, old_query):
        """ composes a query to get a summary of
            projects that will be in shown in an old_query
        """
        query = {}
        if 'q' in old_query:
            query['q'] = old_query['q']
        if 'q.op' in old_query:
            query['q.op'] = old_query['q.op']
        if 'fq' in old_query:
            query['fq'] = old_query['fq']
        query['debugQuery'] = 'false'
        query['facet'] = 'true'
        query['facet.mincount'] = 1
        query['rows'] = 0
        query['start'] = 0
        query['facet.field'] = [SolrDocument.ROOT_PROJECT_SOLR]
        return query