Esempio n. 1
0
    def setUp(self) -> None:
        # Set up testing cores in Solr and swap them in
        self.core_name_opinion = settings.SOLR_OPINION_TEST_CORE_NAME
        self.core_name_audio = settings.SOLR_AUDIO_TEST_CORE_NAME
        self.core_name_people = settings.SOLR_PEOPLE_TEST_CORE_NAME
        self.core_name_recap = settings.SOLR_RECAP_TEST_CORE_NAME

        self.si_opinion = scorched.SolrInterface(
            settings.SOLR_OPINION_URL, mode="rw"
        )
        self.si_audio = scorched.SolrInterface(
            settings.SOLR_AUDIO_URL, mode="rw"
        )
        self.si_people = scorched.SolrInterface(
            settings.SOLR_PEOPLE_URL, mode="rw"
        )
        self.si_recap = scorched.SolrInterface(
            settings.SOLR_RECAP_URL, mode="rw"
        )
        self.all_sis = [
            self.si_opinion,
            self.si_audio,
            self.si_people,
            self.si_recap,
        ]
Esempio n. 2
0
 def setUp(self):
     # Set up testing cores in Solr and swap them in
     self.core_name_opinion = settings.SOLR_OPINION_TEST_CORE_NAME
     self.core_name_audio = settings.SOLR_AUDIO_TEST_CORE_NAME
     self.core_name_people = settings.SOLR_PEOPLE_TEST_CORE_NAME
     self.core_name_recap = settings.SOLR_RECAP_TEST_CORE_NAME
     root = settings.INSTALL_ROOT
     create_temp_solr_core(
         self.core_name_opinion,
         os.path.join(root, 'Solr', 'conf', 'schema.xml'),
     )
     create_temp_solr_core(
         self.core_name_audio,
         os.path.join(root, 'Solr', 'conf', 'audio_schema.xml'),
     )
     create_temp_solr_core(
         self.core_name_people,
         os.path.join(root, 'Solr', 'conf', 'person_schema.xml'),
     )
     create_temp_solr_core(
         self.core_name_recap,
         os.path.join(root, 'Solr', 'conf', 'recap_schema.xml'))
     self.si_opinion = sunburnt.SolrInterface(settings.SOLR_OPINION_URL,
                                              mode='rw')
     self.si_audio = sunburnt.SolrInterface(settings.SOLR_AUDIO_URL,
                                            mode='rw')
     self.si_people = sunburnt.SolrInterface(settings.SOLR_PEOPLE_URL,
                                             mode='rw')
     # This will cause headaches, but it follows in the mission to slowly
     # migrate off of sunburnt. This was added after the items above, and so
     # uses scorched, not sunburnt.
     self.si_recap = scorched.SolrInterface(settings.SOLR_RECAP_URL,
                                            mode='rw')
Esempio n. 3
0
    def setUp(self):
        # Set up testing cores in Solr and swap them in
        self.core_name_opinion = settings.SOLR_OPINION_TEST_CORE_NAME
        self.core_name_audio = settings.SOLR_AUDIO_TEST_CORE_NAME
        self.core_name_people = settings.SOLR_PEOPLE_TEST_CORE_NAME
        self.core_name_recap = settings.SOLR_RECAP_TEST_CORE_NAME

        self.si_opinion = sunburnt.SolrInterface(
            settings.SOLR_OPINION_URL, mode="rw"
        )
        self.si_audio = sunburnt.SolrInterface(
            settings.SOLR_AUDIO_URL, mode="rw"
        )
        self.si_people = sunburnt.SolrInterface(
            settings.SOLR_PEOPLE_URL, mode="rw"
        )
        # This will cause headaches, but it follows in the mission to slowly
        # migrate off of sunburnt. This was added after the items above, and so
        # uses scorched, not sunburnt.
        self.si_recap = scorched.SolrInterface(
            settings.SOLR_RECAP_URL, mode="rw"
        )
        self.all_sis = [
            self.si_opinion,
            self.si_audio,
            self.si_people,
            self.si_recap,
        ]
Esempio n. 4
0
def solr_delete(sender, instance, **kwargs):
    import scorched
    from django.conf import settings
    solr_conn = scorched.SolrInterface(settings.SOLR_SERVER)
    response = solr_conn.query(id=instance.id).execute()
    if response.result.docs:
        solr_conn.delete_by_ids([x['id'] for x in response.result.docs])
Esempio n. 5
0
def add_docket_to_solr_by_rds(item_pks, force_commit=False):
    """Add RECAPDocuments from a single Docket to Solr.

    This is a performance enhancement that can be used when adding many RECAP
    Documents from a single docket to Solr. Instead of pulling the same docket
    metadata for these items over and over (adding potentially thousands of
    queries on a large docket), just pull the metadata once and cache it for
    every document that's added.

    :param item_pks: RECAPDocument pks to add or update in Solr.
    :param force_commit: Whether to send a commit to Solr (this is usually not
    needed).
    :return: None
    """
    si = scorched.SolrInterface(settings.SOLR_RECAP_URL, mode="w")
    rds = RECAPDocument.objects.filter(pk__in=item_pks).order_by()
    try:
        metadata = rds[0].get_docket_metadata()
    except IndexError:
        metadata = None

    try:
        si.add([item.as_search_dict(docket_metadata=metadata) for item in rds])
        if force_commit:
            si.commit()
        si.conn.http_connection.close()
    except SolrError as exc:
        add_docket_to_solr_by_rds.retry(exc=exc, countdown=30)
Esempio n. 6
0
def add_or_update_recap_document(item_pks,
                                 coalesce_docket=False,
                                 force_commit=False):
    """Add or update recap documents in Solr.

    :param item_pks: RECAPDocument pks to add or update in Solr.
    :param coalesce_docket: If True, assume that the PKs all correspond to
    RECAPDocument objects on the same docket. Instead of processing each
    RECAPDocument individually, pull out repeated metadata so that it is
    only queried from the database once instead of once/RECAPDocument. This can
    provide significant performance improvements since some dockets have
    thousands of entries, each of which would otherwise need to make the same
    queries to the DB.
    :param force_commit: Should we send a commit message at the end of our
    updates?
    :return: None
    """
    si = scorched.SolrInterface(settings.SOLR_RECAP_URL, mode='w')
    rds = RECAPDocument.objects.filter(pk__in=item_pks).order_by()
    if coalesce_docket:
        try:
            metadata = rds[0].get_docket_metadata()
        except IndexError:
            metadata = None
    else:
        metadata = None

    try:
        si.add([item.as_search_dict(docket_metadata=metadata) for item in rds])
        if force_commit:
            si.commit()
    except SolrError as exc:
        add_or_update_recap_document.retry(exc=exc, countdown=30)
Esempio n. 7
0
 def _teardown_test_solr():
     """Empty out the test cores that we use"""
     conns = [settings.SOLR_OPINION_TEST_URL, settings.SOLR_AUDIO_TEST_URL]
     for conn in conns:
         si = scorched.SolrInterface(conn, mode='rw')
         si.delete_all()
         si.commit()
Esempio n. 8
0
def delete_items(items, solr_url, force_commit=False):
    si = scorched.SolrInterface(solr_url, mode='w')
    try:
        si.delete_by_ids(list(items))
        if force_commit:
            si.commit()
    except SolrError as exc:
        delete_items.retry(exc=exc, countdown=30)
Esempio n. 9
0
def solr_delete(sender, instance, created, **kwargs):
    from django.conf import settings
    import scorched

    solrconn = scorched.SolrInterface(settings.SOLR_SERVER)
    records = solrconn.query(type="language", item_id="{0}".format(instance.id)).execute()
    solrconn.delete_by_ids([x['id'] for x in records])
    solrconn.commit()
Esempio n. 10
0
def delete_items(items, app_label, force_commit=False):
    si = scorched.SolrInterface(settings.SOLR_URLS[app_label], mode="w")
    try:
        si.delete_by_ids(list(items))
        if force_commit:
            si.commit()
    except SolrError as exc:
        delete_items.retry(exc=exc, countdown=30)
Esempio n. 11
0
def delete_items(items, solr_obj_type, force_commit=False):
    si = scorched.SolrInterface(settings.SOLR_URLS[solr_obj_type], mode='w')
    try:
        si.delete_by_ids(list(items))
        if force_commit:
            si.commit()
    except SolrError as exc:
        delete_items.retry(exc=exc, countdown=30)
Esempio n. 12
0
 def _teardown_test_solr() -> None:
     """Empty out the test cores that we use"""
     conns = [settings.SOLR_OPINION_TEST_URL, settings.SOLR_AUDIO_TEST_URL]
     for conn in conns:
         si = scorched.SolrInterface(conn, mode="rw")
         si.delete_all()
         si.commit()
         si.conn.http_connection.close()
Esempio n. 13
0
def commit_if_not_yet(group_result):
    """Commit results if they have not yet been commited."""
    for child in group_result.children:
        child = child.result
        if child[0] == 0:
            solr_conn = scorched.SolrInterface(settings.SOLR_SERVER)
            resp = solr_conn.query(id=child[1]).execute()
            if resp.result.numFound == 0:
                solr_conn.commit()
            return
Esempio n. 14
0
def add_or_update_audio_files(item_pks, force_commit=False):
    si = scorched.SolrInterface(settings.SOLR_AUDIO_URL, mode='w')
    try:
        si.add([
            item.as_search_dict()
            for item in Audio.objects.filter(pk__in=item_pks)
        ])
        if force_commit:
            si.commit()
    except SolrError as exc:
        add_or_update_audio_files.retry(exc=exc, countdown=30)
Esempio n. 15
0
def add_or_update_recap_document(item_pks, force_commit=False):
    si = scorched.SolrInterface(settings.SOLR_RECAP_URL, mode='w')
    try:
        si.add([
            item.as_search_dict()
            for item in RECAPDocument.objects.filter(pk__in=item_pks)
        ])
        if force_commit:
            si.commit()
    except SolrError, exc:
        add_or_update_recap_document.retry(exc=exc, countdown=30)
Esempio n. 16
0
def add_or_update_opinions(item_pks, force_commit=False):
    si = scorched.SolrInterface(settings.SOLR_OPINION_URL, mode='w')
    try:
        si.add([
            item.as_search_dict()
            for item in Opinion.objects.filter(pk__in=item_pks)
        ])
        if force_commit:
            si.commit()
    except SolrError, exc:
        add_or_update_opinions.retry(exc=exc, countdown=30)
Esempio n. 17
0
def add_or_update_cluster(pk, force_commit=False):
    si = scorched.SolrInterface(settings.SOLR_OPINION_URL, mode='w')
    try:
        si.add([
            item.as_search_dict()
            for item in OpinionCluster.objects.get(pk=pk).sub_opinions.all()
        ])
        if force_commit:
            si.commit()
    except SolrError as exc:
        add_or_update_cluster.retry(exc=exc, countdown=30)
Esempio n. 18
0
def add_or_update_people(item_pks, force_commit=False):
    si = scorched.SolrInterface(settings.SOLR_PEOPLE_URL, mode='w')
    try:
        si.add([
            item.as_search_dict()
            for item in Person.objects.filter(pk__in=item_pks)
        ])
        if force_commit:
            si.commit()
    except SolrError as exc:
        add_or_update_people.retry(exc=exc, countdown=30)
Esempio n. 19
0
 def get(self, request, *args, **kwargs):
     page = request.GET.get('page')
     if page:
         start = ((int(page) - 1) * 12)
     else:
         start = 0
     solr_conn = scorched.SolrInterface(settings.SOLR_SERVER)
     response = solr_conn.query().set_requesthandler('/minimal')\
         .sort_by("-created_timestamp")\
         .paginate(start=start, rows=RECENT_MANIFEST_COUNT).execute()
     return Response(
         format_response(request, response, page_by=RECENT_MANIFEST_COUNT))
Esempio n. 20
0
    def handle(self, *args, **options):
        solr = scorched.SolrInterface(settings.SOLR_URLS['credentials'])

        try:
            solr.delete_all()
            solr.commit()
        except Exception as e:
            raise CommandError('Collection could not be deleted: {}'.format(e))

        self.stdout.write(
            self.style.SUCCESS(
                'Successfully deleted all records in "credentials" collection')
        )
Esempio n. 21
0
def do_minimal_search(request):
    page = request.GET.get('page')
    if page:
        start = ((int(page) - 1) * 10)
    else:
        start = 0

    solr_conn = scorched.SolrInterface(settings.SOLR_SERVER)
    response = solr_conn.query(request.GET.get('q'))\
        .set_requesthandler('/minimal')\
        .paginate(start=start).execute()

    return format_response(request, response)
Esempio n. 22
0
 def get(self, request, *args, **kwargs):
     man_pk = self.kwargs['pk']
     solr_conn = scorched.SolrInterface(settings.SOLR_SERVER)
     response = solr_conn.query(man_pk).set_requesthandler(
         '/manifest').execute()
     if response.result.numFound != 1:
         data = {
             "error": "Could not resolve manifest '{}'".format(man_pk),
             "numFound": response.result.numFound
         }
         return Response(data, status=status.HTTP_400_BAD_REQUEST)
     data = json.loads(response.result.docs[0]['manifest'])
     return Response(data)
Esempio n. 23
0
def add_or_update_items(items, solr_object_type):
    """Adds an item to a solr index.

    This function is for use with the update_index command. It's slightly
    different than the commands below because it expects a Django object,
    rather than a primary key. This rejects the standard Celery advice about
    not passing objects around, but thread safety shouldn't be an issue since
    this is only used by the update_index command, and we want to get the
    objects in the task, not in its caller.

    :param items: A list of items or a single item to add or update in Solr
    :param solr_object_type: The solr object type being updated so that the URL
    can be pulled from the settings file. This is essential since different
    celery workers may connect to solr on different machines.
    :return None
    """
    if hasattr(items, "items") or not hasattr(items, "__iter__"):
        # If it's a dict or a single item make it a list
        items = [items]
    search_item_list = []
    for item in items:
        si = scorched.SolrInterface(settings.SOLR_URLS[solr_object_type],
                                    mode='w')
        try:
            if type(item) == Opinion:
                search_item_list.append(item.as_search_dict())
            elif type(item) == RECAPDocument:
                search_item_list.append(item.as_search_dict())
            elif type(item) == Docket:
                # Slightly different here b/c dockets return a list of search
                # dicts.
                search_item_list.extend(item.as_search_list())
            elif type(item) == Audio:
                search_item_list.append(item.as_search_dict())
            elif type(item) == Person:
                search_item_list.append(item.as_search_dict())
        except AttributeError as e:
            print("AttributeError trying to add: %s\n  %s" % (item, e))
        except ValueError as e:
            print("ValueError trying to add: %s\n  %s" % (item, e))
        except InvalidDocumentError:
            print("Unable to parse: %s" % item)

    try:
        si.add(search_item_list)
    except socket.error as exc:
        add_or_update_items.retry(exc=exc, countdown=120)
    else:
        if type(item) == Docket:
            item.date_last_index = now()
            item.save()
Esempio n. 24
0
def add_or_update_recap_docket(
    data, force_commit=False, update_threshold=60 * 60
):
    """Add an entire docket to Solr or update it if it's already there.

    This is an expensive operation because to add or update a RECAP docket in
    Solr means updating every document that's a part of it. So if a docket has
    10,000 documents, we'll have to pull them *all* from the database, and
    re-index them all. It'd be nice to not have to do this, but because Solr is
    de-normalized, every document in the RECAP Solr index has a copy of every
    field in Solr. For example, if the name of the case changes, that has to get
    reflected in every document in the docket in Solr.

    To deal with this mess, we have a field on the docket that says when we last
    updated it in Solr. If that date is after a threshold, we just don't do the
    update unless we know the docket has something new.

    :param data: A dictionary containing the a key for 'docket_pk' and
    'content_updated'. 'docket_pk' will be used to find the docket to modify.
    'content_updated' is a boolean indicating whether the docket must be
    updated.
    :param force_commit: Whether to send a commit to Solr (this is usually not
    needed).
    :param update_threshold: Items staler than this number of seconds will be
    updated. Items fresher than this number will be a no-op.
    """
    if data is None:
        return

    si = scorched.SolrInterface(settings.SOLR_RECAP_URL, mode="w")
    some_time_ago = now() - timedelta(seconds=update_threshold)
    d = Docket.objects.get(pk=data["docket_pk"])
    too_fresh = d.date_last_index is not None and (
        d.date_last_index > some_time_ago
    )
    update_not_required = not data.get("content_updated", False)
    if all([too_fresh, update_not_required]):
        return
    else:
        try:
            si.add(d.as_search_list())
            if force_commit:
                si.commit()
            si.conn.http_connection.close()
        except SolrError as exc:
            add_or_update_recap_docket.retry(exc=exc, countdown=30)
        else:
            d.date_last_index = now()
            d.save()
Esempio n. 25
0
    def get(self, request, *args, **kwargs):
        """Return formatted result based on query"""
        q = request.GET.get('q')
        if not q or len(q) < 3:
            return Response({'suggestions': []})

        solr_conn = scorched.SolrInterface(settings.SOLR_SERVER)
        response = solr_conn.query(q) \
            .set_requesthandler('/suggest').execute()

        suggestions = response.spellcheck['suggestions']
        nice_list = []
        if suggestions:
            nice_list = suggestions[1]['suggestion']
        return Response({'suggestions': nice_list})
Esempio n. 26
0
    def _run(self, args, config):
        ''' Main entry point. '''

        try:
            solr_url = config.get('solr', 'url').rstrip('/') + '/'
            solr = scorched.SolrInterface(solr_url)
        except:
            raise cli.CliError('Unable to connect to solr: %s' % solr_url)

        if args.action in ('add', 'add-all'):
            database_config = dict(config.items('database'))
            db = app.database.get_engine(database_config)
            if args.stubs == 1:
                profile_stubs = True
            else:
                profile_stubs = False

            if args.action == 'add':
                self.add_models(db, solr, args.models.split(','),
                                profile_stubs)
            else:
                self.add_models(db, solr, profile_stubs=profile_stubs)

            solr.optimize()
            self._logger.info("Added requested documents and optimized index.")

        elif args.action in ('delete', 'delete-all'):
            if args.action == 'delete':
                self.delete_models(solr, args.models.split(','))
            else:
                solr.delete_all()

            solr.optimize()
            self._logger.info("Deleted requested documents and optimized "
                              "index.")

        elif args.action == 'optimize':
            solr.optimize()
            self._logger.info("Optimized index.")

        elif args.action == 'schema':
            schema_url = urljoin(solr_url, 'schema')
            self.schema(schema_url)
Esempio n. 27
0
def add_or_update_items(items, solr_url=settings.SOLR_OPINION_URL):
    """Adds an item to a solr index.

    This function is for use with the update_index command. It's slightly
    different than the commands below because it expects a Django object,
    rather than a primary key. This rejects the standard Celery advice about
    not passing objects around, but thread safety shouldn't be an issue since
    this is only used by the update_index command, and we want to get the
    objects in the task, not in its caller.
    """
    si = scorched.SolrInterface(solr_url, mode='w')
    if hasattr(items, "items") or not hasattr(items, "__iter__"):
        # If it's a dict or a single item make it a list
        items = [items]
    search_item_list = []
    for item in items:
        try:
            if type(item) == Opinion:
                search_item_list.append(item.as_search_dict())
            elif type(item) == RECAPDocument:
                search_item_list.append(item.as_search_dict())
            elif type(item) == Docket:
                # Slightly different here b/c dockets return a list of search
                # dicts.
                search_item_list.extend(item.as_search_list())
            elif type(item) == Audio:
                search_item_list.append(item.as_search_dict())
            elif type(item) == Person:
                search_item_list.append(item.as_search_dict())
        except AttributeError as e:
            print("AttributeError trying to add: %s\n  %s" % (item, e))
        except ValueError as e:
            print("ValueError trying to add: %s\n  %s" % (item, e))
        except InvalidDocumentError:
            print("Unable to parse: %s" % item)

    try:
        si.add(search_item_list)
    except socket.error as exc:
        add_or_update_items.retry(exc=exc, countdown=120)
Esempio n. 28
0
def solr_index(sender, instance, created, **kwargs):
    import uuid 
    from django.conf import settings
    import scorched

    solrconn = scorched.SolrInterface(settings.SOLR_SERVER)
    
    #check if it already exists
    records = solrconn.query(type="language", item_id="{0}".format(instance.id)).execute()
    if records:
        #delete first. then add
        solrconn.delete_by_ids([x['id'] for x in records])

    d = {
        'id': str(uuid.uuid4()),
        'type': 'language',
        'item_id': instance.id,
        'name': instance.name,
    }

    solrconn.add(d)
    solrconn.commit()
Esempio n. 29
0
def add_items_to_solr(item_pks, app_label, force_commit=False):
    """Add a list of items to Solr

    :param item_pks: An iterable list of item PKs that you wish to add to Solr.
    :param app_label: The type of item that you are adding.
    :param force_commit: Whether to send a commit to Solr after your addition.
    This is generally not advised and is mostly used for testing.
    """
    search_dicts = []
    model = apps.get_model(app_label)
    items = model.objects.filter(pk__in=item_pks).order_by()
    for item in items:
        try:
            if model in [OpinionCluster, Docket]:
                # Dockets make a list of items; extend, don't append
                search_dicts.extend(item.as_search_list())
            else:
                search_dicts.append(item.as_search_dict())
        except AttributeError as e:
            print("AttributeError trying to add: %s\n  %s" % (item, e))
        except ValueError as e:
            print("ValueError trying to add: %s\n  %s" % (item, e))
        except InvalidDocumentError:
            print("Unable to parse: %s" % item)

    si = scorched.SolrInterface(settings.SOLR_URLS[app_label], mode="w")
    try:
        si.add(search_dicts)
        if force_commit:
            si.commit()
        si.conn.http_connection.close()
    except (socket.error, SolrError) as exc:
        add_items_to_solr.retry(exc=exc, countdown=30)
    else:
        # Mark dockets as updated if needed
        if model == Docket:
            items.update(date_modified=now(), date_last_index=now())
        si.conn.http_connection.close()
Esempio n. 30
0
def solr_index(sender, instance, created, **kwargs):
    import uuid 
    from django.conf import settings
    import scorched

    solrconn = scorched.SolrInterface(settings.SOLR_SERVER)
    
    #check if it already exists
    records = solrconn.query(type="snippet", item_id="{0}".format(instance.pk)).execute()
    if records:
        #delete first. then add
        solrconn.delete_by_ids([x['id'] for x in records])

    d = {
        'id': str(uuid.uuid4()),
        'type': 'snippet',
        'item_id': instance.pk,
        'snippet': instance.snippet,
        'title': instance.title,
        'tags': [tag.name for tag in instance.tags.all()],
    }

    solrconn.add(d)
    solrconn.commit()