Example #1
0
    def setUp(self):
        # Set up testing cores in Solr and swap them in
        self.core_name_opinion = settings.SOLR_OPINION_TEST_CORE_NAME
        self.core_name_audio = settings.SOLR_AUDIO_TEST_CORE_NAME
        self.core_name_people = settings.SOLR_PEOPLE_TEST_CORE_NAME
        self.core_name_recap = settings.SOLR_RECAP_TEST_CORE_NAME

        self.si_opinion = sunburnt.SolrInterface(
            settings.SOLR_OPINION_URL, mode="rw"
        )
        self.si_audio = sunburnt.SolrInterface(
            settings.SOLR_AUDIO_URL, mode="rw"
        )
        self.si_people = sunburnt.SolrInterface(
            settings.SOLR_PEOPLE_URL, mode="rw"
        )
        # This will cause headaches, but it follows in the mission to slowly
        # migrate off of sunburnt. This was added after the items above, and so
        # uses scorched, not sunburnt.
        self.si_recap = scorched.SolrInterface(
            settings.SOLR_RECAP_URL, mode="rw"
        )
        self.all_sis = [
            self.si_opinion,
            self.si_audio,
            self.si_people,
            self.si_recap,
        ]
Example #2
0
 def setUp(self):
     # Set up testing cores in Solr and swap them in
     self.core_name_opinion = settings.SOLR_OPINION_TEST_CORE_NAME
     self.core_name_audio = settings.SOLR_AUDIO_TEST_CORE_NAME
     self.core_name_people = settings.SOLR_PEOPLE_TEST_CORE_NAME
     self.core_name_recap = settings.SOLR_RECAP_TEST_CORE_NAME
     root = settings.INSTALL_ROOT
     create_temp_solr_core(
         self.core_name_opinion,
         os.path.join(root, 'Solr', 'conf', 'schema.xml'),
     )
     create_temp_solr_core(
         self.core_name_audio,
         os.path.join(root, 'Solr', 'conf', 'audio_schema.xml'),
     )
     create_temp_solr_core(
         self.core_name_people,
         os.path.join(root, 'Solr', 'conf', 'person_schema.xml'),
     )
     create_temp_solr_core(
         self.core_name_recap,
         os.path.join(root, 'Solr', 'conf', 'recap_schema.xml'))
     self.si_opinion = sunburnt.SolrInterface(settings.SOLR_OPINION_URL,
                                              mode='rw')
     self.si_audio = sunburnt.SolrInterface(settings.SOLR_AUDIO_URL,
                                            mode='rw')
     self.si_people = sunburnt.SolrInterface(settings.SOLR_PEOPLE_URL,
                                             mode='rw')
     # This will cause headaches, but it follows in the mission to slowly
     # migrate off of sunburnt. This was added after the items above, and so
     # uses scorched, not sunburnt.
     self.si_recap = scorched.SolrInterface(settings.SOLR_RECAP_URL,
                                            mode='rw')
Example #3
0
 def __init__(self, *args, **kwargs):
     super(Command, self).__init__(*args, **kwargs)
     self.connections = {
         'o': sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='r'),
         'oa': sunburnt.SolrInterface(settings.SOLR_AUDIO_URL, mode='r'),
     }
     self.options = {}
     self.valid_ids = {}
Example #4
0
def index_sitemap_maker(request):
    """Generate a sitemap index page

    Counts the number of cases in the site, divides by `items_per_sitemap` and
    provides links items.
    """
    connection_string_obj_type_pairs = (
        (settings.SOLR_OPINION_URL, 'opinions'),
        (settings.SOLR_AUDIO_URL, 'oral-arguments'),
    )
    sites = []
    for connection_string, obj_type in connection_string_obj_type_pairs:
        conn = sunburnt.SolrInterface(connection_string, mode='r')
        search_results_object = conn.raw_query(**index_solr_params).execute()
        count = search_results_object.result.numFound
        num_pages = count / items_per_sitemap + 1
        for i in range(1, num_pages + 1):
            sites.append('https://www.courtlistener.com/sitemap-%s.xml?p=%s' %
                         (obj_type, i))

    # Random additional sitemaps.
    sites.extend([
        'https://www.courtlistener.com/sitemap-simple-pages.xml',
        'https://www.courtlistener.com/sitemap-visualizations.xml',
    ])

    xml = loader.render_to_string('sitemap_index.xml', {'sitemaps': sites})

    # These links contain case names, so they should get crawled but not
    # indexed
    response = HttpResponse(xml, content_type='application/xml')
    response['X-Robots-Tag'] = 'noindex, noodp, noarchive, noimageindex'
    return response
Example #5
0
def people_sitemap_maker(request):
    conn = sunburnt.SolrInterface(settings.SOLR_PEOPLE_URL, mode='r')
    page = int(request.GET.get("p", 1))
    start = (page - 1) * items_per_sitemap
    params = {
        'q': '*:*',
        'rows': items_per_sitemap,
        'start': start,
        'fl': ','.join([
            'absolute_url',
            'timestamp',
        ]),
        'sort': 'dob asc',
        'caller': 'people_sitemap_maker',
    }
    results = conn.raw_query(**params).execute()

    # Translate Solr object into something Django's template can use
    urls = []
    for result in results:
        urls.append({
            'location':
            'https://www.courtlistener.com%s' % result['absolute_url'],
            'changefreq':
            'monthly',
            'lastmod':
            result['timestamp'],
            'priority':
            '0.5',
        })

    xml = smart_str(loader.render_to_string('sitemap.xml', {'urlset': urls}))
    response = HttpResponse(xml, content_type='application/xml')
    response['X-Robots-Tag'] = 'noindex, noodp, noarchive, noimageindex'
    return response
Example #6
0
def coverage_data(request, version, court):
    """Provides coverage data for a court.

    Responds to either AJAX or regular requests.
    """

    if court != 'all':
        court_str = get_object_or_404(Court, pk=court).pk
    else:
        court_str = 'all'
    q = request.GET.get('q')
    conn = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='r')
    response = conn.raw_query(**build_coverage_query(court_str, q)).execute()
    counts = response.facet_counts.facet_ranges[0][1][0][1]
    counts = strip_zero_years(counts)

    # Calculate the totals
    annual_counts = {}
    total_docs = 0
    for date_string, count in counts:
        annual_counts[date_string[:4]] = count
        total_docs += count

    return JsonResponse({
        'annual_counts': annual_counts,
        'total': total_docs,
    },
                        safe=True)
Example #7
0
def make_court_variable():
    courts = Court.objects.exclude(jurisdiction=Court.TESTING_COURT)
    conn = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='r')
    response = conn.raw_query(**build_court_count_query()).execute()
    court_count_tuples = response.facet_counts.facet_fields['court_exact']
    courts = annotate_courts_with_counts(courts, court_count_tuples)
    return courts
Example #8
0
def add_or_update_items(items, solr_url=settings.SOLR_OPINION_URL):
    """Adds an item to a solr index.

    This function is for use with the update_index command. It's slightly
    different than the commands below because it expects a Django object,
    rather than a primary key. This rejects the standard Celery advice about
    not passing objects around, but thread safety shouldn't be an issue since
    this is only used by the update_index command, and we want to query and
    build the SearchDocument objects in the task, not in its caller.
    """
    si = sunburnt.SolrInterface(solr_url, mode='w')
    if hasattr(items, "items") or not hasattr(items, "__iter__"):
        # If it's a dict or a single item make it a list
        items = [items]
    search_item_list = []
    for item in items:
        try:
            if type(item) == Audio:
                search_item_list.append(SearchAudioFile(item))
            elif type(item) == Opinion:
                search_item_list.append(SearchDocument(item))
        except AttributeError as e:
            print "AttributeError trying to add: %s\n  %s" % (item, e)
        except ValueError as e:
            print "ValueError trying to add: %s\n  %s" % (item, e)
        except InvalidDocumentError:
            print "Unable to parse: %s" % item

    try:
        si.add(search_item_list)
    except socket.error, exc:
        add_or_update_items.retry(exc=exc, countdown=120)
Example #9
0
def view_opinion(request, pk, _):
    """Using the cluster ID, return the cluster of opinions.

    We also test if the cluster ID is a favorite for the user, and send data
    if needed. If it's a favorite, we send the bound form for the favorite so
    it can populate the form on the page. If it is not a favorite, we send the
    unbound form.
    """
    # Look up the court, cluster, title and favorite information
    cluster = get_object_or_404(OpinionCluster, pk=pk)
    title = '%s, %s' % (
        trunc(best_case_name(cluster), 100),
        cluster.citation_string,
    )
    get_string = search_utils.make_get_string(request)

    try:
        fave = Favorite.objects.get(
            cluster_id=cluster.pk,
            user=request.user,
        )
        favorite_form = FavoriteForm(instance=fave)
    except (ObjectDoesNotExist, TypeError):
        # Not favorited or anonymous user
        favorite_form = FavoriteForm(
            initial={
                'cluster_id': cluster.pk,
                'name': trunc(best_case_name(cluster), 100, ellipsis='...'),
            }
        )

    # Get the citing results from Solr for speed.
    conn = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='r')
    q = {
        'q': 'cites:({ids})'.format(
            ids=' OR '.join([str(pk) for pk in
                             (cluster.sub_opinions
                              .values_list('pk', flat=True))])
        ),
        'rows': 5,
        'start': 0,
        'sort': 'citeCount desc',
        'caller': 'view_opinion',
    }
    citing_clusters = conn.raw_query(**q).execute()

    return render_to_response(
        'view_opinion.html',
        {
            'title': title,
            'cluster': cluster,
            'favorite_form': favorite_form,
            'get_string': get_string,
            'private': cluster.blocked,
            'citing_clusters': citing_clusters,
            'top_authorities': cluster.authorities[:5],
        },
        RequestContext(request)
    )
Example #10
0
def add_or_update_cluster(pk, force_commit=True):
    si = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='w')
    try:
        si.add([SearchDocument(item) for item in
                OpinionCluster.objects.get(pk=pk).sub_opinions.all()])
        if force_commit:
            si.commit()
    except SolrError, exc:
        add_or_update_cluster.retry(exc=exc, countdown=30)
Example #11
0
def add_or_update_people(item_pks, force_commit=True):
    si = sunburnt.SolrInterface(settings.SOLR_PEOPLE_URL, mode='w')
    try:
        si.add([SearchPerson(item) for item in
                Person.objects.filter(pk__in=item_pks)])
        if force_commit:
            si.commit()
    except SolrError, exc:
        add_or_update_people.retry(exc=exc, countdown=30)
Example #12
0
def add_or_update_opinions(item_pks, force_commit=True):
    si = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='w')
    try:
        si.add([SearchDocument(item) for item in
                Opinion.objects.filter(pk__in=item_pks)])
        if force_commit:
            si.commit()
    except SolrError, exc:
        add_or_update_opinions.retry(exc=exc, countdown=30)
Example #13
0
def add_or_update_audio_files(item_pks, force_commit=True):
    si = sunburnt.SolrInterface(settings.SOLR_AUDIO_URL, mode='w')
    try:
        si.add([SearchAudioFile(item) for item in
                Audio.objects.filter(pk__in=item_pks)])
        if force_commit:
            si.commit()
    except SolrError, exc:
        add_or_update_audio_files.retry(exc=exc, countdown=30)
Example #14
0
 def items(self, obj):
     conn = sunburnt.SolrInterface(settings.SOLR_AUDIO_URL, mode='r')
     params = {
         'q': '*:*',
         'sort': 'dateArgued desc',
         'rows': '20',
         'start': '0',
         'caller': 'AllJurisdictionsPodcast',
     }
     return conn.raw_query(**params).execute()
Example #15
0
def place_facet_queries(cd,
                        conn=sunburnt.SolrInterface(settings.SOLR_OPINION_URL,
                                                    mode='r')):
    """Get facet values for the status filters

    Using the search form, query Solr and get the values for the status filters.
    """
    # Build up all the queries needed
    facet_params = {
        'rows': '0',
        'facet': 'true',
        'facet.mincount': 0,
        'facet.field': '{!ex=dt}status_exact',
        'q': cd['q'] or '*:*',
        'caller': 'facet_parameters',
    }
    fq = []

    # Case Name and judges
    if cd['case_name']:
        fq.append(make_fq(cd, 'caseName', 'case_name'))
    if cd['judge']:
        fq.append(make_fq(cd, 'judge', 'judge'))

    # Citations
    if cd['citation']:
        fq.append(make_fq_proximity_query(cd, 'citation', 'citation'))
    if cd['docket_number']:
        fq.append(make_fq(cd, 'docketNumber', 'docket_number'))
    if cd['neutral_cite']:
        fq.append(make_fq(cd, 'neutralCite', 'neutral_cite'))

    fq.append(
        make_date_query('dateFiled', cd['filed_before'], cd['filed_after']))
    fq.append(make_cite_count_query(cd))

    # Faceting
    selected_courts_string = get_selected_field_string(
        cd, 'court_')  # Status facets depend on court checkboxes
    selected_stats_string = get_selected_field_string(cd, 'stat_')
    if len(selected_stats_string) > 0:
        fq.extend([
            '{!tag=dt}status_exact:(%s)' % selected_stats_string,
            'court_exact:(%s)' % selected_courts_string
        ])

    # If a param has been added to the fq variables, then we add them to the
    # main_params var. Otherwise, we don't, as doing so throws an error.
    if len(fq) > 0:
        facet_params['fq'] = fq

    stat_facet_fields = conn.raw_query(
        **facet_params).execute().facet_counts.facet_fields

    return stat_facet_fields
Example #16
0
    def setUp(self):
        # Set up testing cores in Solr and swap them in
        self.core_name_opinion = settings.SOLR_OPINION_TEST_CORE_NAME
        self.core_name_audio = settings.SOLR_AUDIO_TEST_CORE_NAME
        create_solr_core(self.core_name_opinion)
        create_solr_core(
            self.core_name_audio,
            schema=os.path.join(settings.INSTALL_ROOT, 'Solr', 'conf',
                                'audio_schema.xml'),
            instance_dir='/usr/local/solr/example/solr/audio',
        )
        # swap_solr_core('collection1', self.core_name_opinion)
        # swap_solr_core('audio', self.core_name_audio)
        self.si_opinion = sunburnt.SolrInterface(settings.SOLR_OPINION_URL,
                                                 mode='rw')
        self.si_audio = sunburnt.SolrInterface(settings.SOLR_AUDIO_URL,
                                               mode='rw')

        self.si_opinion.commit()
        self.si_audio.commit()
Example #17
0
 def items(self, obj):
     """Do a Solr query here. Return the first 20 results"""
     conn = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='r')
     params = {
         'q': '*:*',
         'sort': 'dateFiled desc',
         'rows': '20',
         'start': '0',
         'caller': 'AllJurisdictionsFeed',
     }
     return conn.raw_query(**params).execute()
Example #18
0
def oral_argument_sitemap_maker(request):
    conn = sunburnt.SolrInterface(settings.SOLR_AUDIO_URL, mode='r')
    page = int(request.GET.get("p", 1))
    start = (page - 1) * items_per_sitemap
    params = {
        'q':
        '*:*',
        'rows':
        items_per_sitemap,
        'start':
        start,
        'fl':
        ','.join([
            'absolute_url',
            'dateArgued',
            'local_path',
            'citeCount',
            'timestamp',
        ]),
        'sort':
        'dateArgued asc',
        'caller':
        'oral_argument_sitemap_maker',
    }
    search_results_object = conn.raw_query(**params).execute()

    # Translate Solr object into something Django's template can use
    urls = []
    for result in search_results_object:
        url_strs = ['https://www.courtlistener.com%s' % result['absolute_url']]
        if result.get('local_path') and result.get('local_path') != '':
            url_strs.append('https://www.courtlistener.com/%s' %
                            result['local_path'])

        sitemap_item = {}
        for url_str in url_strs:
            sitemap_item['location'] = url_str
            sitemap_item['changefreq'] = 'yearly'
            sitemap_item['lastmod'] = result['timestamp']
            if any(s in url_str for s in ['mp3']):
                sitemap_item['priority'] = '0.3'
            else:
                sitemap_item['priority'] = '0.5'
            urls.append(dict(sitemap_item))

    xml = smart_str(loader.render_to_string('sitemap.xml', {'urlset': urls}))
    # These links contain case names, so they should get crawled but not
    # indexed
    response = HttpResponse(xml, content_type='application/xml')
    response['X-Robots-Tag'] = 'noindex, noodp, noarchive, noimageindex'
    return response
Example #19
0
 def items(self, obj):
     """
     Returns a list of items to publish in this feed.
     """
     conn = sunburnt.SolrInterface(settings.SOLR_AUDIO_URL, mode='r')
     params = {
         'q': '*:*',
         'fq': 'court_exact:%s' % obj.pk,
         'sort': 'dateArgued desc',
         'rows': '20',
         'start': '0',
         'caller': 'JurisdictionPodcast',
     }
     return conn.raw_query(**params).execute()
Example #20
0
def do_search(request, rows=20, order_by=None, type=None):

    # Bind the search form.
    search_form = SearchForm(request.GET)
    if search_form.is_valid():
        cd = search_form.cleaned_data
        # Allows an override by calling methods.
        if order_by:
            cd['order_by'] = order_by
        if type:
            cd['type'] = type
        search_form = _clean_form(request, cd)

        try:
            if cd['type'] == 'o':
                conn = sunburnt.SolrInterface(settings.SOLR_OPINION_URL,
                                              mode='r')
                stat_facet_fields = search_utils.place_facet_queries(cd, conn)
                status_facets = search_utils.make_stats_variable(
                    stat_facet_fields, search_form)
            elif cd['type'] == 'oa':
                conn = sunburnt.SolrInterface(settings.SOLR_AUDIO_URL,
                                              mode='r')
                status_facets = None
            results_si = conn.raw_query(**search_utils.build_main_query(cd))

            courts = Court.objects.filter(in_use=True).values(
                'pk', 'short_name', 'jurisdiction',
                'has_oral_argument_scraper')
            courts, court_count_human, court_count = search_utils\
                .merge_form_with_courts(courts, search_form)

        except Exception, e:
            logger.warning("Error loading search with request: %s" %
                           request.GET)
            logger.warning("Error was %s" % e)
            return {'error': True}
Example #21
0
 def items(self, obj):
     search_form = SearchForm(obj.GET)
     if search_form.is_valid():
         cd = search_form.cleaned_data
         conn = sunburnt.SolrInterface(settings.SOLR_AUDIO_URL, mode='r')
         main_params = search_utils.build_main_query(cd, highlight=False)
         main_params.update({
             'sort': 'dateArgued desc',
             'rows': '20',
             'start': '0',
             'caller': 'SearchFeed',
         })
         return conn.raw_query(**main_params).execute()
     else:
         return []
Example #22
0
 def setUp(self):
     # Set up testing cores in Solr and swap them in
     self.core_name_opinion = settings.SOLR_OPINION_TEST_CORE_NAME
     self.core_name_audio = settings.SOLR_AUDIO_TEST_CORE_NAME
     self.core_name_people = settings.SOLR_PEOPLE_TEST_CORE_NAME
     root = settings.INSTALL_ROOT
     create_temp_solr_core(
         self.core_name_opinion,
         os.path.join(root, 'Solr', 'conf', 'schema.xml'),
     )
     create_temp_solr_core(
         self.core_name_audio,
         os.path.join(root, 'Solr', 'conf', 'audio_schema.xml'),
     )
     create_temp_solr_core(
         self.core_name_people,
         os.path.join(root, 'Solr', 'conf', 'person_schema.xml'),
     )
     self.si_opinion = sunburnt.SolrInterface(
         settings.SOLR_OPINION_URL, mode='rw')
     self.si_audio = sunburnt.SolrInterface(
         settings.SOLR_AUDIO_URL, mode='rw')
     self.si_people = sunburnt.SolrInterface(
         settings.SOLR_PEOPLE_URL, mode='rw')
def match_citation(citation, citing_doc=None):
    """For a citation object, try to match it to an item in the database using
    a variety of heuristics.

    Returns:
      - a Solr Result object with the results, or an empty list if no hits
    """
    # TODO: Create shared solr connection for all queries
    conn = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='r')
    main_params = {
        'q': '*',
        'fq': [
            'status:Precedential',  # Non-precedential documents aren't cited
        ],
        'caller': 'citation.match_citations.match_citation',
    }
    if citing_doc is not None:
        # Eliminate self-cites.
        main_params['fq'].append('-id:%s' % citing_doc.pk)
    # Set up filter parameters
    if citation.year:
        start_year = end_year = citation.year
    else:
        start_year, end_year = get_years_from_reporter(citation)
        if citing_doc is not None and citing_doc.cluster.date_filed:
            end_year = min(end_year, citing_doc.cluster.date_filed.year)
    main_params['fq'].append(
        'dateFiled:%s' % build_date_range(start_year, end_year)
    )

    if citation.court:
        main_params['fq'].append('court_exact:%s' % citation.court)

    # Take 1: Use a phrase query to search the citation field.
    main_params['fq'].append(
        'citation:("%s")' % citation.base_citation()
    )
    results = conn.raw_query(**main_params).execute()
    if len(results) == 1:
        return results
    if len(results) > 1:
        if citing_doc is not None and citation.defendant:
            # Refine using defendant, if there is one
            results = case_name_query(conn, main_params, citation, citing_doc)
        return results

    # Give up.
    return []
Example #24
0
    def handle(self, *args, **options):
        self.verbosity = int(options.get('verbosity', 1))
        self.solr_url = options['solr_url']
        self.si = sunburnt.SolrInterface(self.solr_url, mode='rw')
        self.options = options
        self.type = options['type']
        self.noinput = options['noinput']

        if options['update']:
            if self.verbosity >= 1:
                self.stdout.write('Running in update mode...\n')
            if options.get('everything'):
                self.add_or_update_all()
            elif options.get('datetime'):
                self.add_or_update_by_datetime(options['datetime'])
            elif options.get('query'):
                self.stderr.write("Updating by query not implemented.")
                sys.exit(1)
            elif options.get('items'):
                self.add_or_update(*options['items'])

        elif options.get('delete'):
            if self.verbosity >= 1:
                self.stdout.write('Running in deletion mode...\n')
            if options.get('everything'):
                self.delete_all()
            elif options.get('datetime'):
                self.delete_by_datetime(options['datetime'])
            elif options.get('query'):
                self.delete_by_query(options['query'])
            elif options.get('items'):
                self.delete(*options['items'])

        elif options.get('do_commit'):
            self.commit()

        elif options.get('optimize'):
            self.optimize()

        else:
            self.stderr.write('Error: You must specify whether you wish to '
                              'update, delete, commit, or optimize your '
                              'index.\n')
            sys.exit(1)
Example #25
0
    def get_expected_item_count(self):
        # OpinionsSitemap uses the solr index to generate the page, so the only
        # accurate count comes from the index itself which will also be based on
        # the fixtures.
        conn = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='r')
        opinion_solr_params['rows'] = 1000

        search_results_object = conn.raw_query(**opinion_solr_params).execute()

        # the underlying SitemapTest relies on counting url elements in the xml
        # response...this logic mimics the creation of the xml, so we at least
        # know what we *should* get getting for a count if the SiteMapTest's
        # HTTP client-based test gets an HTTP 200
        count = 0
        for result in search_results_object:
            if result.get('local_path'):
                count += 3
            else:
                count += 2
        return count
Example #26
0
    def handle(self, *args, **options):
        super(Command, self).handle(*args, **options)
        both_list_and_endpoints = options.get("doc_id") is not None and (
            options.get("start_id") is not None or options.get("end_id")
            is not None or options.get("filed_after") is not None)
        no_option = not any([
            options.get("doc_id") is None,
            options.get("start_id") is None,
            options.get("end_id") is None,
            options.get("filed_after") is None,
            options.get("all") is False,
        ])
        if both_list_and_endpoints or no_option:
            raise CommandError("Please specify either a list of documents, a "
                               "range of ids, a range of dates, or "
                               "everything.")

        self.index = options["index"]
        self.si = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode="rw")

        # Use query chaining to build the query
        query = Opinion.objects.all()
        if options.get("doc_id"):
            query = query.filter(pk__in=options.get("doc_id"))
        if options.get("end_id"):
            query = query.filter(pk__lte=options.get("end_id"))
        if options.get("start_id"):
            query = query.filter(pk__gte=options.get("start_id"))
        if options.get("filed_after"):
            query = query.filter(
                cluster__date_filed__gte=options["filed_after"])
        if options.get("all"):
            query = Opinion.objects.all()
        self.count = query.count()
        self.average_per_s = 0
        self.timings = []
        count = query.count()
        opinion_pks = query.values_list("pk", flat=True).iterator()
        self.update_documents(opinion_pks, count)
        self.add_to_solr()
    def handle(self, *args, **options):
        super(Command, self).handle(*args, **options)
        both_list_and_endpoints = (options.get('doc_id') is not None and
                                   (options.get('start_id') is not None or
                                    options.get('end_id') is not None or
                                    options.get('filed_after') is not None))
        no_option = (not any([options.get('doc_id') is None,
                              options.get('start_id') is None,
                              options.get('end_id') is None,
                              options.get('filed_after') is None,
                              options.get('all') is False]))
        if both_list_and_endpoints or no_option:
            raise CommandError('Please specify either a list of documents, a '
                               'range of ids, a range of dates, or '
                               'everything.')

        self.index = options['index']
        self.si = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='rw')

        # Use query chaining to build the query
        query = Opinion.objects.all()
        if options.get('doc_id'):
            query = query.filter(pk__in=options.get('doc_id'))
        if options.get('end_id'):
            query = query.filter(pk__lte=options.get('end_id'))
        if options.get('start_id'):
            query = query.filter(pk__gte=options.get('start_id'))
        if options.get('filed_after'):
            query = query.filter(
                cluster__date_filed__gte=options['filed_after'])
        if options.get('all'):
            query = Opinion.objects.all()
        self.count = query.count()
        self.average_per_s = 0
        self.timings = []
        count = query.count()
        opinion_pks = query.values_list('pk', flat=True).iterator()
        self.update_documents(opinion_pks, count)
        self.add_to_solr()
Example #28
0
def opinion_sitemap_maker(request):
    conn = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='r')
    page = request.GET.get("p", '1')
    opinion_solr_params['start'] = (int(page) - 1) * items_per_sitemap

    search_results_object = conn.raw_query(**opinion_solr_params).execute()

    # Translate Solr object into something Django's template can use
    urls = []
    for result in search_results_object:
        url_strs = [
            'https://www.courtlistener.com%s' % result['absolute_url'],
            'https://www.courtlistener.com%sauthorities/' %
            result['absolute_url']
        ]
        if result.get('local_path') and result.get('local_path') != '':
            url_strs.append(
                'https://www.courtlistener.com/%s' % result['local_path'])

        sitemap_item = {}
        for url_str in url_strs:
            sitemap_item['location'] = url_str
            sitemap_item['changefreq'] = 'yearly'
            sitemap_item['lastmod'] = result['timestamp']
            if any(s in url_str for s in
                   ['authorities', 'pdf', 'doc', 'wpd']):
                sitemap_item['priority'] = '0.3'
            else:
                sitemap_item['priority'] = '0.5'
            urls.append(dict(sitemap_item))

    xml = smart_str(loader.render_to_string('sitemap.xml', {'urlset': urls}))
    # These links contain case names, so they should get crawled but not
    # indexed
    response = HttpResponse(xml, content_type='application/xml')
    response['X-Robots-Tag'] = 'noindex, noodp, noarchive, noimageindex'
    return response
Example #29
0
def get_citing_clusters_with_cache(cluster, is_bot):
    """Use Solr to get clusters citing the one we're looking at

    If it's not a bot, cache the results for a long time. If it is a bot, load
    those results if they exist. Otherwise, return None.

    :param cluster: The cluster we're targeting
    :type cluster: OpinionCluster
    :param is_bot: Whether the page running this was loaded by a bot
    :type is_bot: bool
    :return: A search result of the top five citing clusters or None
    :rtype: SolrSearch or None
    """
    cache_key = "citing:%s" % cluster.pk
    cache = caches["db_cache"]
    if is_bot:
        # If the cache was set by a real user, bots can access it. But if no
        # user set the cache, this will just return None.
        return cache.get(cache_key)

    # Get the citing results from Solr for speed. Only do this for humans
    # to save on disk usage.
    sub_opinion_pks = cluster.sub_opinions.values_list("pk", flat=True)
    ids_str = " OR ".join([str(pk) for pk in sub_opinion_pks])
    q = {
        "q": "cites:(%s)" % ids_str,
        "rows": 5,
        "start": 0,
        "sort": "citeCount desc",
        "caller": "view_opinion",
    }
    conn = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode="r")
    citing_clusters = conn.raw_query(**q).execute()
    a_month = 60 * 60 * 24 * 30
    cache.set(cache_key, citing_clusters, a_month)

    return citing_clusters
Example #30
0
    def handle(self, *args, **options):
        both_list_and_endpoints = (options.get('doc_id') is not None and
                                   (options.get('start_id') is not None
                                    or options.get('end_id') is not None
                                    or options.get('filed_after') is not None))
        no_option = (not any([
            options.get('doc_id') is None,
            options.get('start_id') is None,
            options.get('end_id') is None,
            options.get('filed_after') is None,
            options.get('all') is False
        ]))
        if both_list_and_endpoints or no_option:
            raise CommandError('Please specify either a list of documents, a '
                               'range of ids, a range of dates, or '
                               'everything.')

        self.index = options['index']
        self.si = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='rw')

        # Use query chaining to build the query
        query = Opinion.objects.all()
        if options.get('doc_id'):
            query = query.filter(pk__in=options.get('doc_id'))
        if options.get('end_id'):
            query = query.filter(pk__lte=options.get('end_id'))
        if options.get('start_id'):
            query = query.filter(pk__gte=options.get('start_id'))
        if options.get('filed_after'):
            query = query.filter(
                cluster__date_filed__gte=options['filed_after'])
        if options.get('all'):
            query = Opinion.objects.all()
        count = query.count()
        docs = queryset_generator(query, chunksize=10000)
        self.update_documents(docs, count)