Ejemplo n.º 1
0
def core_docket_data(request, pk):
    """Gather the core data for a docket, party, or IDB page."""
    docket = get_object_or_404(Docket, pk=pk)
    title = ', '.join([
        s for s in [
            trunc(best_case_name(docket), 100, ellipsis="..."),
            docket.docket_number,
        ] if s.strip()
    ])

    try:
        fave = Favorite.objects.get(docket_id=docket.pk, user=request.user)
    except (ObjectDoesNotExist, TypeError):
        # Not favorited or anonymous user
        favorite_form = FavoriteForm(
            initial={
                'docket_id': docket.pk,
                'name': trunc(best_case_name(docket), 100, ellipsis='...'),
            })
    else:
        favorite_form = FavoriteForm(instance=fave)

    has_alert = False
    if request.user.is_authenticated:
        has_alert = DocketAlert.objects.filter(docket=docket,
                                               user=request.user).exists()

    return docket, {
        'docket': docket,
        'title': title,
        'favorite_form': favorite_form,
        'has_alert': has_alert,
        'timezone': COURT_TIMEZONES.get(docket.court_id, 'US/Eastern'),
        'private': docket.blocked,
    }
Ejemplo n.º 2
0
def core_docket_data(request, pk):
    """Gather the core data for a docket, party, or IDB page."""
    docket = get_object_or_404(Docket, pk=pk)
    title = ', '.join([s for s in [
        trunc(best_case_name(docket), 100, ellipsis="..."),
        docket.docket_number,
    ] if s.strip()])

    try:
        fave = Favorite.objects.get(docket_id=docket.pk, user=request.user)
    except (ObjectDoesNotExist, TypeError):
        # Not favorited or anonymous user
        favorite_form = FavoriteForm(initial={
            'docket_id': docket.pk,
            'name': trunc(best_case_name(docket), 100, ellipsis='...'),
        })
    else:
        favorite_form = FavoriteForm(instance=fave)

    has_alert = False
    if request.user.is_authenticated:
        has_alert = DocketAlert.objects.filter(docket=docket,
                                               user=request.user).exists()

    return docket, {
        'docket': docket,
        'title': title,
        'favorite_form': favorite_form,
        'has_alert': has_alert,
        'timezone': COURT_TIMEZONES.get(docket.court_id, 'US/Eastern'),
        'private': docket.blocked,
    }
Ejemplo n.º 3
0
def view_audio_file(request, pk, _):
    """Using the ID, return the oral argument page.

    We also test if the item is a favorite and send data as such.
    """
    af = get_object_or_404(Audio, pk=pk)
    title = trunc(af.case_name, 100)
    get_string = search_utils.make_get_string(request)

    try:
        fave = Favorite.objects.get(audio_id=af.pk, user=request.user)
    except (ObjectDoesNotExist, TypeError):
        # Not favorited or anonymous user
        favorite_form = FavoriteForm(initial={
            'audio_id': af.pk,
            'name': trunc(best_case_name(af.docket), 100, ellipsis='...'),
        })
    else:
        favorite_form = FavoriteForm(instance=fave)

    return render(request, 'oral_argument.html', {
        'title': title,
        'af': af,
        'favorite_form': favorite_form,
        'get_string': get_string,
        'private': af.blocked,
    })
Ejemplo n.º 4
0
def view_audio_file(request, pk, _):
    """Using the ID, return the oral argument page.

    We also test if the item is a favorite and send data as such.
    """
    af = get_object_or_404(Audio, pk=pk)
    title = trunc(af.case_name, 100)
    get_string = search_utils.make_get_string(request)

    try:
        fave = Favorite.objects.get(audio_id=af.pk, user=request.user)
    except (ObjectDoesNotExist, TypeError):
        # Not favorited or anonymous user
        favorite_form = FavoriteForm(
            initial={
                'audio_id': af.pk,
                'name': trunc(best_case_name(af.docket), 100, ellipsis='...'),
            })
    else:
        favorite_form = FavoriteForm(instance=fave)

    return render(
        request, 'oral_argument.html', {
            'title': title,
            'af': af,
            'favorite_form': favorite_form,
            'get_string': get_string,
            'private': af.blocked,
        })
Ejemplo n.º 5
0
def view_opinion(request, pk, _):
    """Using the cluster ID, return the cluster of opinions.

    We also test if the cluster ID is a favorite for the user, and send data
    if needed. If it's a favorite, we send the bound form for the favorite so
    it can populate the form on the page. If it is not a favorite, we send the
    unbound form.
    """
    # Look up the court, cluster, title and favorite information
    cluster = get_object_or_404(OpinionCluster, pk=pk)
    title = '%s, %s' % (
        trunc(best_case_name(cluster), 100),
        cluster.citation_string,
    )
    get_string = search_utils.make_get_string(request)

    try:
        fave = Favorite.objects.get(
            cluster_id=cluster.pk,
            user=request.user,
        )
        favorite_form = FavoriteForm(instance=fave)
    except (ObjectDoesNotExist, TypeError):
        # Not favorited or anonymous user
        favorite_form = FavoriteForm(
            initial={
                'cluster_id': cluster.pk,
                'name': trunc(best_case_name(cluster), 100, ellipsis='...'),
            }
        )

    # Get the citing results from Solr for speed.
    conn = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='r')
    q = {
        'q': 'cites:({ids})'.format(
            ids=' OR '.join([str(pk) for pk in
                             (cluster.sub_opinions
                              .values_list('pk', flat=True))])
        ),
        'rows': 5,
        'start': 0,
        'sort': 'citeCount desc',
        'caller': 'view_opinion',
    }
    citing_clusters = conn.raw_query(**q).execute()

    return render_to_response(
        'view_opinion.html',
        {
            'title': title,
            'cluster': cluster,
            'favorite_form': favorite_form,
            'get_string': get_string,
            'private': cluster.blocked,
            'citing_clusters': citing_clusters,
            'top_authorities': cluster.authorities[:5],
        },
        RequestContext(request)
    )
Ejemplo n.º 6
0
def view_parties(request, docket_id, slug):
    """Show the parties and attorneys tab on the docket."""
    docket = get_object_or_404(Docket, pk=docket_id)
    title = ', '.join([
        s for s in [
            trunc(best_case_name(docket), 100, ellipsis="..."),
            docket.docket_number,
        ] if s.strip()
    ])
    try:
        fave = Favorite.objects.get(docket_id=docket.pk, user=request.user)
    except (ObjectDoesNotExist, TypeError):
        # Not favorited or anonymous user
        favorite_form = FavoriteForm(
            initial={
                'docket_id': docket.pk,
                'name': trunc(best_case_name(docket), 100, ellipsis='...'),
            })
    else:
        favorite_form = FavoriteForm(instance=fave)

    # We work with this data at the level of party_types so that we can group
    # the parties by this field. From there, we do a whole mess of prefetching,
    # which reduces the number of queries needed for this down to four instead
    # of potentially thousands (good times!)
    party_types = docket.party_types.select_related('party').prefetch_related(
        Prefetch(
            'party__roles',
            queryset=Role.objects.filter(docket=docket).order_by(
                'attorney_id', 'role',
                'date_action').select_related('attorney').prefetch_related(
                    Prefetch(
                        'attorney__organizations',
                        queryset=AttorneyOrganization.objects.filter(
                            attorney_organization_associations__docket=docket).
                        distinct(),
                        to_attr='firms_in_docket',
                    )))).order_by('name', 'party__name')

    parties = []
    for party_type_name, party_types in groupby(party_types, lambda x: x.name):
        party_types = list(party_types)
        parties.append({
            'party_type_name': party_type_name,
            'party_type_objects': party_types
        })

    return render(
        request, 'docket_parties.html', {
            'docket': docket,
            'title': title,
            'parties': parties,
            'favorite_form': favorite_form,
            'timezone': COURT_TIMEZONES.get(docket.court_id, 'US/Eastern'),
            'private': docket.blocked,
        })
Ejemplo n.º 7
0
def view_opinion(request, pk, _):
    """Using the cluster ID, return the cluster of opinions.

    We also test if the cluster ID is a favorite for the user, and send data
    if needed. If it's a favorite, we send the bound form for the favorite so
    it can populate the form on the page. If it is not a favorite, we send the
    unbound form.
    """
    # Look up the court, cluster, title and favorite information
    cluster = get_object_or_404(OpinionCluster, pk=pk)
    title = ", ".join([
        s for s in [
            trunc(best_case_name(cluster), 100, ellipsis="..."),
            cluster.citation_string,
        ] if s.strip()
    ])
    has_downloads = False
    for sub_opinion in cluster.sub_opinions.all():
        if sub_opinion.local_path or sub_opinion.download_url:
            has_downloads = True
            break
    get_string = make_get_string(request)

    try:
        fave = Favorite.objects.get(cluster_id=cluster.pk, user=request.user)
    except (ObjectDoesNotExist, TypeError):
        # Not favorited or anonymous user
        favorite_form = FavoriteForm(
            initial={
                "cluster_id": cluster.pk,
                "name": trunc(best_case_name(cluster), 100, ellipsis="..."),
            })
    else:
        favorite_form = FavoriteForm(instance=fave)

    citing_clusters = get_citing_clusters_with_cache(cluster, is_bot(request))

    return render(
        request,
        "view_opinion.html",
        {
            "title": title,
            "cluster": cluster,
            "has_downloads": has_downloads,
            "favorite_form": favorite_form,
            "get_string": get_string,
            "private": cluster.blocked,
            "citing_clusters": citing_clusters,
            "top_authorities": cluster.authorities_with_data[:5],
            "authorities_count": len(cluster.authorities_with_data),
        },
    )
Ejemplo n.º 8
0
def view_recap_document(request, docket_id=None, doc_num=None,  att_num=None,
                        slug=''):
    """This view can either load an attachment or a regular document,
    depending on the URL pattern that is matched.
    """
    item = get_object_or_404(
        RECAPDocument,
        docket_entry__docket__id=docket_id,
        document_number=doc_num,
        attachment_number=att_num,
    )
    title = make_rd_title(item)
    if is_og_bot(request):
        make_thumb_if_needed(item)
        item.refresh_from_db()
    try:
        fave = Favorite.objects.get(recap_doc_id=item.pk, user=request.user)
    except (ObjectDoesNotExist, TypeError):
        # Not favorited or anonymous user
        favorite_form = FavoriteForm(initial={
            'recap_doc_id': item.pk,
            'name': trunc(title, 100, ellipsis='...'),
        })
    else:
        favorite_form = FavoriteForm(instance=fave)

    return render(request, 'recap_document.html', {
        'document': item,
        'title': title,
        'favorite_form': favorite_form,
        'private': True,  # Always True for RECAP docs.
    })
Ejemplo n.º 9
0
    def save(self, index=True, force_commit=False, *args, **kwargs):
        self.slug = slugify(trunc(best_case_name(self), 75))
        super(OpinionCluster, self).save(*args, **kwargs)
        if index:
            from cl.search.tasks import add_or_update_cluster

            add_or_update_cluster.delay(self.pk, force_commit)
Ejemplo n.º 10
0
def view_recap_document(request, docket_id=None, doc_num=None,  att_num=None,
                        slug=''):
    """This view can either load an attachment or a regular document,
    depending on the URL pattern that is matched.
    """
    item = get_object_or_404(
        RECAPDocument,
        docket_entry__docket__id=docket_id,
        document_number=doc_num,
        attachment_number=att_num,
    )
    title = make_rd_title(item)
    if is_og_bot(request):
        make_thumb_if_needed(item)
        item.refresh_from_db()
    try:
        fave = Favorite.objects.get(recap_doc_id=item.pk, user=request.user)
    except (ObjectDoesNotExist, TypeError):
        # Not favorited or anonymous user
        favorite_form = FavoriteForm(initial={
            'recap_doc_id': item.pk,
            'name': trunc(title, 100, ellipsis='...'),
        })
    else:
        favorite_form = FavoriteForm(instance=fave)

    return render(request, 'recap_document.html', {
        'document': item,
        'title': title,
        'favorite_form': favorite_form,
        'private': True,  # Always True for RECAP docs.
    })
Ejemplo n.º 11
0
def core_docket_data(
    request: HttpRequest,
    pk: int,
) -> Tuple[Docket, Dict[str, Union[bool, str, Docket, FavoriteForm]]]:
    """Gather the core data for a docket, party, or IDB page."""
    docket = get_object_or_404(Docket, pk=pk)
    title = make_docket_title(docket)

    try:
        fave = Favorite.objects.get(docket_id=docket.pk, user=request.user)
    except (ObjectDoesNotExist, TypeError):
        # Not favorited or anonymous user
        favorite_form = FavoriteForm(
            initial={
                "docket_id": docket.pk,
                "name": trunc(best_case_name(docket), 100, ellipsis="..."),
            }
        )
    else:
        favorite_form = FavoriteForm(instance=fave)

    has_alert = user_has_alert(request.user, docket)

    return (
        docket,
        {
            "docket": docket,
            "title": title,
            "favorite_form": favorite_form,
            "has_alert": has_alert,
            "timezone": COURT_TIMEZONES.get(docket.court_id, "US/Eastern"),
            "private": docket.blocked,
        },
    )
Ejemplo n.º 12
0
def view_recap_document(request, docket_id=None, doc_num=None,  att_num=None,
                        slug=''):
    """This view can either load an attachment or a regular document,
    depending on the URL pattern that is matched.
    """
    item = get_object_or_404(
        RECAPDocument,
        docket_entry__docket__id=docket_id,
        document_number=doc_num,
        attachment_number=att_num,
    )
    title = '%sDocument #%s%s in %s' % (
        '%s – ' % item.description if item.description else '',
        item.document_number,
        ', Attachment #%s' % item.attachment_number if
        item.document_type == RECAPDocument.ATTACHMENT else '',
        best_case_name(item.docket_entry.docket),
    )
    try:
        fave = Favorite.objects.get(recap_doc_id=item.pk, user=request.user)
    except (ObjectDoesNotExist, TypeError):
        # Not favorited or anonymous user
        favorite_form = FavoriteForm(initial={
            'recap_doc_id': item.pk,
            'name': trunc(title, 100, ellipsis='...'),
        })
    else:
        favorite_form = FavoriteForm(instance=fave)

    return render(request, 'recap_document.html', {
        'document': item,
        'title': title,
        'favorite_form': favorite_form,
        'private': True,  # Always True for RECAP docs.
    })
Ejemplo n.º 13
0
def make_pdf_path(instance, filename, thumbs=False):
    from cl.search.models import ClaimHistory, RECAPDocument
    from cl.lasc.models import LASCPDF

    if type(instance) == RECAPDocument:
        root = "recap"
        court_id = instance.docket_entry.docket.court_id
        pacer_case_id = instance.docket_entry.docket.pacer_case_id
    elif type(instance) == ClaimHistory:
        root = "claim"
        court_id = instance.claim.docket.court_id
        pacer_case_id = instance.pacer_case_id
    elif type(instance) == LASCPDF:
        slug = slugify(trunc(filename, 40))
        root = "/us/state/ca/lasc/%s/" % instance.docket_number
        file_name = "gov.ca.lasc.%s.%s.%s.pdf" % (
            instance.docket_number,
            instance.document_id,
            slug,
        )

        return os.path.join(root, file_name)
    else:
        raise ValueError("Unknown model type in make_pdf_path "
                         "function: %s" % type(instance))

    if thumbs:
        root = root + "-thumbnails"
    return os.path.join(root, get_bucket_name(court_id, pacer_case_id),
                        filename)
Ejemplo n.º 14
0
    def save(self, *args, **kwargs):
        self.slug = slugify(trunc(best_case_name(self), 75))
        if self.source == 1 and not self.pacer_case_id:
            raise ValidationError("pacer_case_id cannot be Null or empty in "
                                  "RECAP documents.")

        super(Docket, self).save(*args, **kwargs)
Ejemplo n.º 15
0
    def save(self, *args, **kwargs):
        self.slug = slugify(trunc(best_case_name(self), 75))
        if self.source == 1 and not self.pacer_case_id:
            raise ValidationError("pacer_case_id cannot be Null or empty in "
                                  "RECAP documents.")

        super(Docket, self).save(*args, **kwargs)
Ejemplo n.º 16
0
def view_docket(request, pk, slug):
    docket = get_object_or_404(Docket, pk=pk)
    if not is_bot(request):
        with suppress_autotime(docket, ['date_modified']):
            cached_count = docket.view_count
            docket.view_count = F('view_count') + 1
            docket.save()
            docket.view_count = cached_count + 1

    try:
        fave = Favorite.objects.get(docket_id=docket.pk, user=request.user)
    except (ObjectDoesNotExist, TypeError):
        # Not favorited or anonymous user
        favorite_form = FavoriteForm(
            initial={
                'docket_id': docket.pk,
                'name': trunc(best_case_name(docket), 100, ellipsis='...'),
            })
    else:
        favorite_form = FavoriteForm(instance=fave)

    de_list = docket.docket_entries.all().prefetch_related('recap_documents')
    form = DocketEntryFilterForm(request.GET)
    if form.is_valid():
        cd = form.cleaned_data
        if cd.get('entry_gte'):
            de_list = de_list.filter(entry_number__gte=cd['entry_gte'])
        if cd.get('entry_lte'):
            de_list = de_list.filter(entry_number__lte=cd['entry_lte'])
        if cd.get('filed_after'):
            de_list = de_list.filter(date_filed__gte=cd['filed_after'])
        if cd.get('filed_before'):
            de_list = de_list.filter(date_filed__lte=cd['filed_before'])
        if cd.get('order_by') == DocketEntryFilterForm.DESCENDING:
            de_list = de_list.order_by('-entry_number')

    paginator = Paginator(de_list, 100, orphans=5)
    page = request.GET.get('page')
    try:
        docket_entries = paginator.page(page)
    except PageNotAnInteger:
        docket_entries = paginator.page(1)
    except EmptyPage:
        docket_entries = paginator.page(paginator.num_pages)

    return render(
        request,
        'view_docket.html',
        {
            'docket': docket,
            'parties':
            docket.parties.exists(),  # Needed to show/hide parties tab.
            'docket_entries': docket_entries,
            'form': form,
            'favorite_form': favorite_form,
            'get_string': make_get_string(request),
            'timezone': COURT_TIMEZONES.get(docket.court_id, 'US/Eastern'),
            'private': docket.blocked,
        })
Ejemplo n.º 17
0
    def make_objects(self, item, court, sha1_hash, content):
        """Takes the meta data from the scraper and associates it with objects.

        Returns the created objects.
        """
        blocked = item['blocked_statuses']
        if blocked is not None:
            date_blocked = date.today()
        else:
            date_blocked = None

        case_name_short = (item.get('case_name_shorts') or
                           self.cnt.make_case_name_short(item['case_names']))
        docket = Docket(
            docket_number=item.get('docket_numbers', ''),
            case_name=item['case_names'],
            case_name_short=case_name_short,
            court=court,
            blocked=blocked,
            date_blocked=date_blocked,
            source=Docket.SCRAPER,
        )

        cluster = OpinionCluster(
            judges=item.get('judges', ''),
            date_filed=item['case_dates'],
            case_name=item['case_names'],
            case_name_short=case_name_short,
            source='C',
            precedential_status=item['precedential_statuses'],
            nature_of_suit=item.get('nature_of_suit', ''),
            blocked=blocked,
            date_blocked=date_blocked,
            federal_cite_one=item.get('west_citations', ''),
            state_cite_one=item.get('west_state_citations', ''),
            neutral_cite=item.get('neutral_citations', ''),
        )
        opinion = Opinion(
            type='010combined',
            sha1=sha1_hash,
            download_url=item['download_urls'],
        )

        error = False
        try:
            cf = ContentFile(content)
            extension = get_extension(content)
            file_name = trunc(item['case_names'].lower(), 75) + extension
            opinion.file_with_date = cluster.date_filed
            opinion.local_path.save(file_name, cf, save=False)
        except:
            msg = ('Unable to save binary to disk. Deleted '
                   'item: %s.\n %s' %
                   (item['case_names'], traceback.format_exc()))
            logger.critical(msg.encode('utf-8'))
            ErrorLog(log_level='CRITICAL', court=court, message=msg).save()
            error = True

        return docket, opinion, cluster, error
Ejemplo n.º 18
0
    def make_objects(self, item, court, sha1_hash, content):
        """Takes the meta data from the scraper and associates it with objects.

        Returns the created objects.
        """
        blocked = item['blocked_statuses']
        if blocked is not None:
            date_blocked = date.today()
        else:
            date_blocked = None

        case_name_short = (item.get('case_name_shorts') or
                           self.cnt.make_case_name_short(item['case_names']))
        docket = Docket(
            docket_number=item.get('docket_numbers', ''),
            case_name=item['case_names'],
            case_name_short=case_name_short,
            court=court,
            blocked=blocked,
            date_blocked=date_blocked,
            source=Docket.SCRAPER,
        )

        cluster = OpinionCluster(
            judges=item.get('judges', ''),
            date_filed=item['case_dates'],
            case_name=item['case_names'],
            case_name_short=case_name_short,
            source='C',
            precedential_status=item['precedential_statuses'],
            nature_of_suit=item.get('nature_of_suit', ''),
            blocked=blocked,
            date_blocked=date_blocked,
            federal_cite_one=item.get('west_citations', ''),
            state_cite_one=item.get('west_state_citations', ''),
            neutral_cite=item.get('neutral_citations', ''),
        )
        opinion = Opinion(
            type='010combined',
            sha1=sha1_hash,
            download_url=item['download_urls'],
        )

        error = False
        try:
            cf = ContentFile(content)
            extension = get_extension(content)
            file_name = trunc(item['case_names'].lower(), 75) + extension
            opinion.file_with_date = cluster.date_filed
            opinion.local_path.save(file_name, cf, save=False)
        except:
            msg = ('Unable to save binary to disk. Deleted '
                   'item: %s.\n %s' %
                   (item['case_names'], traceback.format_exc()))
            logger.critical(msg.encode('utf-8'))
            ErrorLog(log_level='CRITICAL', court=court, message=msg).save()
            error = True

        return docket, opinion, cluster, error
Ejemplo n.º 19
0
def make_docket_title(docket):
    title = ", ".join([
        s for s in [
            trunc(best_case_name(docket), 100, ellipsis="..."),
            docket.docket_number,
        ] if s.strip()
    ])
    return title
    def make_objects(self, item, court, sha1_hash, content):
        blocked = item["blocked_statuses"]
        if blocked:
            date_blocked = date.today()
        else:
            date_blocked = None

        case_name_short = item.get(
            "case_name_shorts"
        ) or self.cnt.make_case_name_short(item["case_names"])

        docket = Docket(
            docket_number=item.get("docket_numbers", ""),
            case_name=item["case_names"],
            case_name_short=case_name_short,
            court=court,
            blocked=blocked,
            date_blocked=date_blocked,
            date_argued=item["case_dates"],
            source=Docket.SCRAPER,
        )

        audio_file = Audio(
            judges=item.get("judges", ""),
            source="C",
            case_name=item["case_names"],
            case_name_short=case_name_short,
            sha1=sha1_hash,
            download_url=item["download_urls"],
            blocked=blocked,
            date_blocked=date_blocked,
        )

        error = False
        try:
            cf = ContentFile(content)
            extension = get_extension(content)
            if extension not in [".mp3", ".wma"]:
                extension = (
                    "." + item["download_urls"].lower().rsplit(".", 1)[1]
                )
            # See bitbucket issue #215 for why this must be
            # lower-cased.
            file_name = trunc(item["case_names"].lower(), 75) + extension
            audio_file.file_with_date = docket.date_argued
            audio_file.local_path_original_file.save(file_name, cf, save=False)
        except:
            msg = (
                "Unable to save binary to disk. Deleted audio file: %s.\n "
                "%s" % (item["case_names"], traceback.format_exc())
            )
            logger.critical(msg.encode("utf-8"))
            ErrorLog(log_level="CRITICAL", court=court, message=msg).save()
            error = True

        return docket, audio_file, error
Ejemplo n.º 21
0
def cluster_visualizations(request, pk, slug):
    cluster = get_object_or_404(OpinionCluster, pk=pk)
    return render(request, 'view_opinion_visualizations.html', {
        'title': '%s, %s' % (
            trunc(best_case_name(cluster), 100),
            cluster.citation_string
        ),
        'cluster': cluster,
        'private': cluster.blocked or cluster.has_private_authority,
    })
Ejemplo n.º 22
0
def cluster_visualizations(request, pk, slug):
    cluster = get_object_or_404(OpinionCluster, pk=pk)
    return render(request, 'view_opinion_visualizations.html', {
        'title': '%s, %s' % (
            trunc(best_case_name(cluster), 100),
            cluster.citation_string
        ),
        'cluster': cluster,
        'private': cluster.blocked or cluster.has_private_authority,
    })
Ejemplo n.º 23
0
def send_docket_alert(d_pk, since):
    """Send an alert for a given docket

    :param d_pk: The docket PK that was modified
    :param since: If we run alerts, notify users about items *since* this time.
    :return: The dict that was passed in as data is simply passed through. The
    next task in the chain needs the same information.
    """
    email_addresses = User.objects.filter(
        docket_alerts__docket_id=d_pk, ).distinct().values_list('email',
                                                                flat=True)
    if email_addresses:
        # We have an alert for this docket. Proceed.
        docket = Docket.objects.get(pk=d_pk)
        new_des = DocketEntry.objects.filter(date_created__gte=since,
                                             docket=docket)

        if new_des.count() > 0:
            # Notify every user that's subscribed to this alert.
            case_name = trunc(best_case_name(docket), 100, ellipsis='...')
            subject_template = loader.get_template('docket_alert_subject.txt')
            subject = subject_template.render({
                'docket': docket,
                'count': new_des.count(),
                'case_name': case_name,
            }).strip()  # Remove newlines that editors can insist on adding.
            email_context = {'new_des': new_des, 'docket': docket}
            txt_template = loader.get_template('docket_alert_email.txt')
            html_template = loader.get_template('docket_alert_email.html')
            messages = []
            for email_address in email_addresses:
                msg = EmailMultiAlternatives(
                    subject=subject,
                    body=txt_template.render(email_context),
                    from_email=settings.DEFAULT_ALERTS_EMAIL,
                    to=[email_address],
                    headers={'X-Entity-Ref-ID': 'docket.alert:%s' % d_pk})
                html = html_template.render(email_context)
                msg.attach_alternative(html, "text/html")
                messages.append(msg)

            # Add a bcc to the first message in the list so that we get a copy.
            messages[0].bcc = ['*****@*****.**']
            connection = get_connection()
            connection.send_messages(messages)
            tally_stat('alerts.docket.alerts.sent', inc=len(email_addresses))

        DocketAlert.objects.filter(docket=docket).update(date_last_hit=now())

    # Work completed, clear the semaphor
    r = redis.StrictRedis(host=settings.REDIS_HOST,
                          port=settings.REDIS_PORT,
                          db=settings.REDIS_DATABASES['ALERTS'])
    r.delete(make_alert_key(d_pk))
Ejemplo n.º 24
0
def process_audio_file(pk):
    """Given the key to an audio file, extract its content and add the related
    meta data to the database.
    """
    af = Audio.objects.get(pk=pk)
    tmp_path = os.path.join("/tmp", "audio_" + uuid.uuid4().hex + ".mp3")
    av_path = get_audio_binary()
    av_command = [
        av_path,
        "-i",
        af.local_path_original_file.path,
        "-ar",
        "22050",  # sample rate (audio samples/s) of 22050Hz
        "-ab",
        "48k",  # constant bit rate (sample resolution) of 48kbps
        tmp_path,
    ]
    try:
        _ = subprocess.check_output(av_command, stderr=subprocess.STDOUT)
    except subprocess.CalledProcessError as e:
        print(
            "%s failed command: %s\nerror code: %s\noutput: %s\n%s"
            % (
                av_path,
                av_command,
                e.returncode,
                e.output,
                traceback.format_exc(),
            )
        )
        raise

    set_mp3_meta_data(af, tmp_path)
    try:
        with open(tmp_path, "r") as mp3:
            cf = ContentFile(mp3.read())
            file_name = trunc(best_case_name(af).lower(), 72) + "_cl.mp3"
            af.file_with_date = af.docket.date_argued
            af.local_path_mp3.save(file_name, cf, save=False)
    except:
        msg = (
            "Unable to save mp3 to audio_file in scraper.tasks."
            "process_audio_file for item: %s\n"
            "Traceback:\n"
            "%s" % (af.pk, traceback.format_exc())
        )
        print(msg)
        ErrorLog.objects.create(
            log_level="CRITICAL", court=af.docket.court, message=msg
        )

    af.duration = eyed3.load(tmp_path).info.time_secs
    af.processing_complete = True
    af.save()
Ejemplo n.º 25
0
    def save(self, *args, **kwargs):
        # Note that the title needs to be made first, so that the slug can be
        # generated from it.
        if not self.title:
            self.title = trunc(self.make_title(), 200, ellipsis=u'…')

        if self.published is True and self.date_published is None:
            # First time shared.
            self.date_published = now()

        if self.deleted is True and self.__original_deleted != self.deleted:
            # Item was just deleted.
            self.date_deleted = now()

        if self.pk is None:
            # First time being saved.
            self.slug = slugify(trunc(self.title, 75))
            # If we could, we'd add clusters and json here, but you can't do
            # that kind of thing until the first object has been saved.
        super(SCOTUSMap, self).save(*args, **kwargs)
        self.__original_deleted = self.deleted
Ejemplo n.º 26
0
    def save(self, *args, **kwargs):
        # Note that the title needs to be made first, so that the slug can be
        # generated from it.
        if not self.title:
            self.title = trunc(self.make_title(), 200, ellipsis=u"…")

        if self.published is True and self.date_published is None:
            # First time shared.
            self.date_published = now()

        if self.deleted is True and self.__original_deleted != self.deleted:
            # Item was just deleted.
            self.date_deleted = now()

        if self.pk is None:
            # First time being saved.
            self.slug = slugify(trunc(self.title, 75))
            # If we could, we'd add clusters and json here, but you can't do
            # that kind of thing until the first object has been saved.
        super(SCOTUSMap, self).save(*args, **kwargs)
        self.__original_deleted = self.deleted
Ejemplo n.º 27
0
    def make_objects(self, item, court, sha1_hash, content):
        blocked = item['blocked_statuses']
        if blocked:
            date_blocked = date.today()
        else:
            date_blocked = None

        case_name_short = (item.get('case_name_shorts') or
                           self.cnt.make_case_name_short(item['case_names']))

        docket = Docket(
            docket_number=item.get('docket_numbers', ''),
            case_name=item['case_names'],
            case_name_short=case_name_short,
            court=court,
            blocked=blocked,
            date_blocked=date_blocked,
            date_argued=item['case_dates'],
            source=Docket.SCRAPER,
        )

        audio_file = Audio(
            judges=item.get('judges', ''),
            source='C',
            case_name=item['case_names'],
            case_name_short=case_name_short,
            sha1=sha1_hash,
            download_url=item['download_urls'],
            blocked=blocked,
            date_blocked=date_blocked,
        )

        error = False
        try:
            cf = ContentFile(content)
            extension = get_extension(content)
            if extension not in ['.mp3', '.wma']:
                extension = '.' + item['download_urls'].lower().rsplit('.',
                                                                       1)[1]
            # See bitbucket issue #215 for why this must be
            # lower-cased.
            file_name = trunc(item['case_names'].lower(), 75) + extension
            audio_file.file_with_date = docket.date_argued
            audio_file.local_path_original_file.save(file_name, cf, save=False)
        except:
            msg = 'Unable to save binary to disk. Deleted audio file: %s.\n ' \
                  '%s' % (item['case_names'], traceback.format_exc())
            logger.critical(msg.encode('utf-8'))
            ErrorLog(log_level='CRITICAL', court=court, message=msg).save()
            error = True

        return docket, audio_file, error
Ejemplo n.º 28
0
def send_docket_alert(d_pk, since):
    """Send an alert for a given docket

    :param d_pk: The docket PK that was modified
    :param since: If we run alerts, notify users about items *since* this time.
    :return: None
    """
    email_addresses = (User.objects.filter(
        docket_alerts__docket_id=d_pk).distinct().values_list("email",
                                                              flat=True))
    if email_addresses:
        # We have an alert for this docket. Proceed.
        docket = Docket.objects.get(pk=d_pk)
        new_des = DocketEntry.objects.filter(date_created__gte=since,
                                             docket=docket)

        if new_des.count() > 0:
            # Notify every user that's subscribed to this alert.
            case_name = trunc(best_case_name(docket), 100, ellipsis="...")
            subject_template = loader.get_template("docket_alert_subject.txt")
            subject = subject_template.render({
                "docket": docket,
                "count": new_des.count(),
                "case_name": case_name,
            }).strip()  # Remove newlines that editors can insist on adding.
            email_context = {"new_des": new_des, "docket": docket}
            txt_template = loader.get_template("docket_alert_email.txt")
            html_template = loader.get_template("docket_alert_email.html")
            messages = []
            for email_address in email_addresses:
                msg = EmailMultiAlternatives(
                    subject=subject,
                    body=txt_template.render(email_context),
                    from_email=settings.DEFAULT_ALERTS_EMAIL,
                    to=[email_address],
                    headers={"X-Entity-Ref-ID": "docket.alert:%s" % d_pk},
                )
                html = html_template.render(email_context)
                msg.attach_alternative(html, "text/html")
                messages.append(msg)

            # Add a bcc to the first message in the list so that we get a copy.
            messages[0].bcc = ["*****@*****.**"]
            connection = get_connection()
            connection.send_messages(messages)
            tally_stat("alerts.docket.alerts.sent", inc=len(email_addresses))

        DocketAlert.objects.filter(docket=docket).update(date_last_hit=now())

    # Work completed, clear the semaphore
    r = make_redis_interface("ALERTS")
    r.delete(make_alert_key(d_pk))
Ejemplo n.º 29
0
def view_recap_document(request,
                        docket_id=None,
                        doc_num=None,
                        att_num=None,
                        slug=""):
    """This view can either load an attachment or a regular document,
    depending on the URL pattern that is matched.
    """
    item = get_object_or_404(
        RECAPDocument,
        docket_entry__docket__id=docket_id,
        document_number=doc_num,
        attachment_number=att_num,
    )

    # Check if the user has requested automatic redirection to the document
    redirectToDownload = request.GET.get("redirectToDownload", False)
    if redirectToDownload:
        # Check if the document is available from Court Listener and
        # if it is, redirect the user to that
        # if it isn't, redirect the user to PACER
        if item.is_available:
            response = redirect(item.filepath_local)
        else:
            response = redirect(item.pacer_url)
        return response

    title = make_rd_title(item)
    if is_og_bot(request):
        make_thumb_if_needed(item)
        item.refresh_from_db()
    try:
        fave = Favorite.objects.get(recap_doc_id=item.pk, user=request.user)
    except (ObjectDoesNotExist, TypeError):
        # Not favorited or anonymous user
        favorite_form = FavoriteForm(initial={
            "recap_doc_id": item.pk,
            "name": trunc(title, 100, ellipsis="..."),
        })
    else:
        favorite_form = FavoriteForm(instance=fave)

    return render(
        request,
        "recap_document.html",
        {
            "document": item,
            "title": title,
            "favorite_form": favorite_form,
            "private": True,  # Always True for RECAP docs.
        },
    )
    def make_objects(self, item, court, sha1_hash, content):
        blocked = item['blocked_statuses']
        if blocked:
            date_blocked = date.today()
        else:
            date_blocked = None

        case_name_short = (item.get('case_name_shorts') or
                           self.cnt.make_case_name_short(item['case_names']))

        docket = Docket(
            docket_number=item.get('docket_numbers', ''),
            case_name=item['case_names'],
            case_name_short=case_name_short,
            court=court,
            blocked=blocked,
            date_blocked=date_blocked,
            date_argued=item['case_dates'],
            source=Docket.SCRAPER,
        )

        audio_file = Audio(
            judges=item.get('judges', ''),
            source='C',
            case_name=item['case_names'],
            case_name_short=case_name_short,
            sha1=sha1_hash,
            download_url=item['download_urls'],
            blocked=blocked,
            date_blocked=date_blocked,
        )

        error = False
        try:
            cf = ContentFile(content)
            extension = get_extension(content)
            if extension not in ['.mp3', '.wma']:
                extension = '.' + item['download_urls'].lower().rsplit('.', 1)[1]
            # See bitbucket issue #215 for why this must be
            # lower-cased.
            file_name = trunc(item['case_names'].lower(), 75) + extension
            audio_file.file_with_date = docket.date_argued
            audio_file.local_path_original_file.save(file_name, cf, save=False)
        except:
            msg = 'Unable to save binary to disk. Deleted audio file: %s.\n ' \
                  '%s' % (item['case_names'], traceback.format_exc())
            logger.critical(msg.encode('utf-8'))
            ErrorLog(log_level='CRITICAL', court=court, message=msg).save()
            error = True

        return docket, audio_file, error
Ejemplo n.º 31
0
def view_docket(request, pk, _):
    docket = get_object_or_404(Docket, pk=pk)
    if not is_bot(request):
        docket.view_count = F('view_count') + 1
        docket.save()

    try:
        fave = Favorite.objects.get(docket_id=docket.pk, user=request.user)
    except (ObjectDoesNotExist, TypeError):
        # Not favorited or anonymous user
        favorite_form = FavoriteForm(
            initial={
                'docket_id': docket.pk,
                'name': trunc(best_case_name(docket), 100, ellipsis='...'),
            })
    else:
        favorite_form = FavoriteForm(instance=fave)

    de_list = docket.docket_entries.all()
    form = DocketEntryFilterForm(request.GET)
    if form.is_valid():
        cd = form.cleaned_data
        if cd.get('entry_gte'):
            de_list = de_list.filter(entry_number__gte=cd['entry_gte'])
        if cd.get('entry_lte'):
            de_list = de_list.filter(entry_number__lte=cd['entry_lte'])
        if cd.get('filed_after'):
            de_list = de_list.filter(date_filed__gte=cd['filed_after'])
        if cd.get('filed_before'):
            de_list = de_list.filter(date_filed__lte=cd['filed_before'])
        if cd.get('order_by') == DocketEntryFilterForm.DESCENDING:
            de_list = de_list.order_by('-entry_number')

    paginator = Paginator(de_list, 500, orphans=25)
    page = request.GET.get('page')
    try:
        docket_entries = paginator.page(page)
    except PageNotAnInteger:
        docket_entries = paginator.page(1)
    except EmptyPage:
        docket_entries = paginator.page(paginator.num_pages)

    return render(
        request, 'view_docket.html', {
            'docket': docket,
            'docket_entries': docket_entries,
            'form': form,
            'favorite_form': favorite_form,
            'get_string': make_get_string(request),
            'private': docket.blocked,
        })
Ejemplo n.º 32
0
def do_docket_number(data: Dict[str, Any]) -> str:
    """Extract the docket number

    :param data: The full json data dict
    :return: The docket number
    """
    if data["docket_number"]:
        return trunc(
            data["docket_number"],
            length=5000,
            ellipsis="...",
        )
    else:
        return ""
Ejemplo n.º 33
0
def cluster_visualizations(
    request: HttpRequest, pk: int, slug: str
) -> HttpResponse:
    cluster = get_object_or_404(OpinionCluster, pk=pk)
    return render(
        request,
        "view_opinion_visualizations.html",
        {
            "title": "%s, %s"
            % (trunc(best_case_name(cluster), 100), cluster.citation_string),
            "cluster": cluster,
            "private": cluster.blocked or cluster.has_private_authority,
        },
    )
Ejemplo n.º 34
0
def view_docket(request, pk, _):
    docket = get_object_or_404(Docket, pk=pk)
    if not is_bot(request):
        docket.view_count = F('view_count') + 1
        docket.save()

    try:
        fave = Favorite.objects.get(docket_id=docket.pk, user=request.user)
    except (ObjectDoesNotExist, TypeError):
        # Not favorited or anonymous user
        favorite_form = FavoriteForm(initial={
            'docket_id': docket.pk,
            'name': trunc(best_case_name(docket), 100, ellipsis='...'),
        })
    else:
        favorite_form = FavoriteForm(instance=fave)

    de_list = docket.docket_entries.all()
    form = DocketEntryFilterForm(request.GET)
    if form.is_valid():
        cd = form.cleaned_data
        if cd.get('entry_gte'):
            de_list = de_list.filter(entry_number__gte=cd['entry_gte'])
        if cd.get('entry_lte'):
            de_list = de_list.filter(entry_number__lte=cd['entry_lte'])
        if cd.get('filed_after'):
            de_list = de_list.filter(date_filed__gte=cd['filed_after'])
        if cd.get('filed_before'):
            de_list = de_list.filter(date_filed__lte=cd['filed_before'])
        if cd.get('order_by') == DocketEntryFilterForm.DESCENDING:
            de_list = de_list.order_by('-entry_number')

    paginator = Paginator(de_list, 500, orphans=25)
    page = request.GET.get('page')
    try:
        docket_entries = paginator.page(page)
    except PageNotAnInteger:
        docket_entries = paginator.page(1)
    except EmptyPage:
        docket_entries = paginator.page(paginator.num_pages)

    return render(request, 'view_docket.html', {
        'docket': docket,
        'docket_entries': docket_entries,
        'form': form,
        'favorite_form': favorite_form,
        'get_string': make_get_string(request),
        'private': docket.blocked,
    })
Ejemplo n.º 35
0
def view_authorities(request, pk, slug):
    cluster = get_object_or_404(OpinionCluster, pk=pk)

    return render_to_response(
        'view_opinion_authorities.html', {
            'title':
            '%s, %s' %
            (trunc(best_case_name(cluster), 100), cluster.citation_string),
            'cluster':
            cluster,
            'private':
            cluster.blocked or cluster.has_private_authority,
            'authorities':
            cluster.authorities.order_by('case_name'),
        }, RequestContext(request))
Ejemplo n.º 36
0
def view_authorities(request, pk, slug):
    cluster = get_object_or_404(OpinionCluster, pk=pk)

    return render_to_response(
        'view_opinion_authorities.html',
        {
            'title': '%s, %s' % (
                trunc(best_case_name(cluster), 100),
                cluster.citation_string
            ),
            'cluster': cluster,
            'private': cluster.blocked or cluster.has_private_authority,
            'authorities': cluster.authorities.order_by('case_name'),
        },
        RequestContext(request)
    )
def make_objects(
    item: Dict[str, Any],
    court: Court,
    sha1_hash: str,
    content: str,
) -> Tuple[Docket, Audio]:
    blocked = item["blocked_statuses"]
    if blocked:
        date_blocked = date.today()
    else:
        date_blocked = None

    case_name_short = item.get("case_name_shorts") or cnt.make_case_name_short(
        item["case_names"]
    )

    docket = Docket(
        docket_number=item.get("docket_numbers", ""),
        case_name=item["case_names"],
        case_name_short=case_name_short,
        court=court,
        blocked=blocked,
        date_blocked=date_blocked,
        date_argued=item["case_dates"],
        source=item.get("source") or Docket.SCRAPER,
    )

    audio_file = Audio(
        judges=item.get("judges", ""),
        source=item.get("cluster_source") or "C",
        case_name=item["case_names"],
        case_name_short=case_name_short,
        sha1=sha1_hash,
        download_url=item["download_urls"],
        blocked=blocked,
        date_blocked=date_blocked,
    )

    cf = ContentFile(content)
    extension = get_extension(content)
    if extension not in [".mp3", ".wma"]:
        extension = "." + item["download_urls"].lower().rsplit(".", 1)[1]
    file_name = trunc(item["case_names"].lower(), 75) + extension
    audio_file.file_with_date = docket.date_argued
    audio_file.local_path_original_file.save(file_name, cf, save=False)

    return docket, audio_file
Ejemplo n.º 38
0
    def test_trunc(self) -> None:
        """Does trunc give us the results we expect?"""

        class TestType(TypedDict, total=False):
            length: int
            result: str
            ellipsis: str

        s = "Henry wants apple."
        tests: Tuple[TestType, ...] = (
            # Simple case
            {"length": 13, "result": "Henry wants"},
            # Off by one cases
            {"length": 4, "result": "Henr"},
            {"length": 5, "result": "Henry"},
            {"length": 6, "result": "Henry"},
            # Do we include the length of the ellipsis when measuring?
            {"length": 12, "ellipsis": "...", "result": "Henry..."},
            # What happens when an alternate ellipsis is used instead?
            {"length": 15, "ellipsis": "....", "result": "Henry wants...."},
            # Do we cut properly when no spaces are found?
            {"length": 2, "result": "He"},
            # Do we cut properly when ellipsizing if no spaces found?
            {"length": 6, "ellipsis": "...", "result": "Hen..."},
            # Do we return the whole s when length >= s?
            {"length": 50, "result": s},
        )
        for test_dict in tests:
            result = trunc(
                s=s,
                length=test_dict["length"],
                ellipsis=test_dict.get("ellipsis", None),
            )
            self.assertEqual(
                result,
                test_dict["result"],
                msg="Failed with dict: %s.\n"
                "%s != %s" % (test_dict, result, test_dict["result"]),
            )
            self.assertTrue(
                len(result) <= test_dict["length"],
                msg="Failed with dict: %s.\n"
                "%s is longer than %s"
                % (test_dict, result, test_dict["length"]),
            )
Ejemplo n.º 39
0
def view_authorities(request, pk, slug):
    cluster = get_object_or_404(OpinionCluster, pk=pk)

    return render(
        request,
        "view_opinion_authorities.html",
        {
            "title":
            "%s, %s" %
            (trunc(best_case_name(cluster), 100), cluster.citation_string),
            "cluster":
            cluster,
            "private":
            cluster.blocked or cluster.has_private_authority,
            "authorities_with_data":
            cluster.authorities_with_data,
        },
    )
Ejemplo n.º 40
0
def view_recap_document(
    request: HttpRequest,
    docket_id: Optional[int] = None,
    doc_num: Optional[int] = None,
    att_num: Optional[int] = None,
    slug: str = "",
) -> HttpResponse:
    """This view can either load an attachment or a regular document,
    depending on the URL pattern that is matched.
    """
    try:
        rd = RECAPDocument.objects.filter(
            docket_entry__docket__id=docket_id,
            document_number=doc_num,
            attachment_number=att_num,
        ).order_by("pk")[0]
    except IndexError:
        raise Http404("No RECAPDocument matches the given query.")

    title = make_rd_title(rd)
    rd = make_thumb_if_needed(request, rd)
    try:
        fave = Favorite.objects.get(recap_doc_id=rd.pk, user=request.user)
    except (ObjectDoesNotExist, TypeError):
        # Not favorited or anonymous user
        favorite_form = FavoriteForm(
            initial={
                "recap_doc_id": rd.pk,
                "name": trunc(title, 100, ellipsis="..."),
            }
        )
    else:
        favorite_form = FavoriteForm(instance=fave)

    return render(
        request,
        "recap_document.html",
        {
            "rd": rd,
            "title": title,
            "favorite_form": favorite_form,
            "private": True,  # Always True for RECAP docs.
        },
    )
Ejemplo n.º 41
0
def process_audio_file(self, pk) -> None:
    """Given the key to an audio file, extract its content and add the related
    meta data to the database.

    :param self: A Celery task object
    :param pk: Audio file pk
    :return: None
    """
    af = Audio.objects.get(pk=pk)
    bte_audio_response = convert_and_clean_audio(af)
    bte_audio_response.raise_for_status()
    audio_obj = bte_audio_response.json()
    cf = ContentFile(base64.b64decode(audio_obj["audio_b64"]))
    file_name = trunc(best_case_name(af).lower(), 72) + "_cl.mp3"
    af.file_with_date = af.docket.date_argued
    af.local_path_mp3.save(file_name, cf, save=False)
    af.duration = audio_obj["duration"]
    af.processing_complete = True
    af.save()
Ejemplo n.º 42
0
def view_recap_document(request,
                        docket_id=None,
                        doc_num=None,
                        att_num=None,
                        slug=''):
    """This view can either load an attachment or a regular document,
    depending on the URL pattern that is matched.
    """
    item = get_object_or_404(
        RECAPDocument,
        docket_entry__docket__id=docket_id,
        document_number=doc_num,
        attachment_number=att_num,
    )
    title = '%sDocument #%s%s in %s' % (
        '%s &ndash; ' % item.description if item.description else '',
        item.document_number,
        ', Attachment #%s' % item.attachment_number
        if item.document_type == RECAPDocument.ATTACHMENT else '',
        best_case_name(item.docket_entry.docket),
    )
    try:
        fave = Favorite.objects.get(recap_doc_id=item.pk, user=request.user)
    except (ObjectDoesNotExist, TypeError):
        # Not favorited or anonymous user
        favorite_form = FavoriteForm(initial={
            'recap_doc_id': item.pk,
            'name': trunc(title, 100, ellipsis='...'),
        })
    else:
        favorite_form = FavoriteForm(instance=fave)

    return render(
        request,
        'recap_document.html',
        {
            'document': item,
            'title': title,
            'favorite_form': favorite_form,
            'private': True,  # Always True for RECAP docs.
        })
Ejemplo n.º 43
0
def process_audio_file(pk):
    """Given the key to an audio file, extract its content and add the related
    meta data to the database.
    """
    af = Audio.objects.get(pk=pk)
    tmp_path = os.path.join('/tmp', 'audio_' + uuid.uuid4().hex + '.mp3')
    avconv_command = [
        'avconv', '-i', af.local_path_original_file.path,
        '-ar', '22050',  # sample rate (audio samples/s) of 22050Hz
        '-ab', '48k',    # constant bit rate (sample resolution) of 48kbps
        tmp_path
    ]
    try:
        _ = subprocess.check_output(
            avconv_command,
            stderr=subprocess.STDOUT
        )
    except subprocess.CalledProcessError as e:
        print('avconv failed command: %s\nerror code: %s\noutput: %s\n%s' %
              (avconv_command, e.returncode, e.output, traceback.format_exc()))
        raise

    set_mp3_meta_data(af, tmp_path)
    try:
        with open(tmp_path, 'r') as mp3:
            cf = ContentFile(mp3.read())
            file_name = trunc(best_case_name(af).lower(), 72) + '_cl.mp3'
            af.file_with_date = af.docket.date_argued
            af.local_path_mp3.save(file_name, cf, save=False)
    except:
        msg = ("Unable to save mp3 to audio_file in scraper.tasks."
               "process_audio_file for item: %s\n"
               "Traceback:\n"
               "%s" % (af.pk, traceback.format_exc()))
        print(msg)
        ErrorLog.objects.create(log_level='CRITICAL', court=af.docket.court,
                                message=msg)

    af.duration = eyed3.load(tmp_path).info.time_secs
    af.processing_complete = True
    af.save()
Ejemplo n.º 44
0
 def test_trunc(self):
     """Does trunc give us the results we expect?"""
     s = 'Henry wants apple.'
     tests = (
         # Simple case
         {'length': 13, 'result': 'Henry wants'},
         # Off by one cases
         {'length': 4, 'result': 'Henr'},
         {'length': 5, 'result': 'Henry'},
         {'length': 6, 'result': 'Henry'},
         # Do we include the length of the ellipsis when measuring?
         {'length': 12, 'ellipsis': '...', 'result': 'Henry...'},
         # What happens when an alternate ellipsis is used instead?
         {'length': 15, 'ellipsis': '....', 'result': 'Henry wants....'},
         # Do we cut properly when no spaces are found?
         {'length': 2, 'result': 'He'},
         # Do we cut properly when ellipsizing if no spaces found?
         {'length': 6, 'ellipsis': '...', 'result': 'Hen...'},
         # Do we return the whole s when length >= s?
         {'length': 50, 'result': s}
     )
     for test_dict in tests:
         result = trunc(
             s=s,
             length=test_dict['length'],
             ellipsis=test_dict.get('ellipsis', None),
         )
         self.assertEqual(
             result,
             test_dict['result'],
             msg='Failed with dict: %s.\n'
                 '%s != %s' % (test_dict, result, test_dict['result'])
         )
         self.assertTrue(
             len(result) <= test_dict['length'],
             msg="Failed with dict: %s.\n"
                 "%s is longer than %s" %
                 (test_dict, result, test_dict['length'])
         )
Ejemplo n.º 45
0
 def save(self, *args, **kwargs):
     self.slug = slugify(trunc(self.name_full, 158))
     self.full_clean()
     super(Person, self).save(*args, **kwargs)
Ejemplo n.º 46
0
            stderr=subprocess.STDOUT
        )
    except subprocess.CalledProcessError, e:
        print 'avconv failed command: %s\nerror code: %s\noutput: %s\n' % \
              (avconv_command, e.returncode, e.output)
        print traceback.format_exc()
        raise

    # Have to do this last because otherwise the mp3 hasn't yet been generated.
    set_mp3_meta_data(af, path_to_tmp_location)

    af.duration = eyed3.load(path_to_tmp_location).info.time_secs

    with open(path_to_tmp_location, 'r') as mp3:
        try:
            cf = ContentFile(mp3.read())
            file_name = trunc(best_case_name(af).lower(), 72) + '_cl.mp3'
            af.file_with_date = af.docket.date_argued
            af.local_path_mp3.save(file_name, cf, save=False)
        except:
            msg = "Unable to save mp3 to audio_file in scraper.tasks.process_" \
                  "audio_file for item: %s\nTraceback:\n%s" % \
                  (af.pk, traceback.format_exc())
            print msg
            ErrorLog(log_level='CRITICAL', court=af.docket.court,
                     message=msg).save()

    af.processing_complete = True
    af.save()
    os.remove(path_to_tmp_location)
Ejemplo n.º 47
0
def download_and_save():
    """This function is run in many threads simultaneously. Each thread
    runs so long as there are items in the queue. Once an item is found, it's
    downloaded and saved.

    The number of items that can be concurrently saved is determined by the
    number of threads that are running this function.
    """
    while True:
        item = queue.get()
        logger.info("%s: Attempting to add item at: %s" %
                    (threading.current_thread().name, item['url']))
        try:
            msg, r = get_binary_content(
                item['url'],
                {},
            )
        except:
            logger.info("%s: Unable to get item at: %s" %
                        (threading.current_thread().name, item['url']))
            queue.task_done()

        if msg:
            logger.warn(msg)
            queue.task_done()
            continue

        sha1_hash = hashlib.sha1(r.content).hexdigest()
        if Audio.objects.filter(sha1=sha1_hash).exists():
            # Simpsons did it! Try the next one.
            logger.info("%s: Item already exists, moving to next item." %
                        threading.current_thread().name)
            queue.task_done()
            continue
        else:
            # New item, onwards!
            logger.info('%s: Adding new document found at: %s' %
                        (threading.current_thread().name, item['url']))
            audio_file = Audio(
                source='H',
                sha1=sha1_hash,
                case_name=item['case_name'],
                download_url=item['url'],
                processing_complete=False,
            )
            if item['judges']:
                audio_file.judges = item['judges']
            if item['docket_number']:
                audio_file.docket.docket_number = item['docket_number']

            court = Court.objects.get(pk=item['court_code'])

            docket = Docket(
                case_name=item['case_name'],
                court=court,
                date_argued=item['date_argued'],
            )
            # Make and associate the file object
            try:
                cf = ContentFile(r.content)
                extension = get_extension(r.content)
                if extension not in ['.mp3', '.wma']:
                    extension = '.' + item['url'].rsplit('.', 1)[1]
                # See bitbucket issue #215 for why this must be
                # lower-cased.
                file_name = trunc(item['case_name'].lower(), 75) + extension
                audio_file.local_path_original_file.save(file_name, cf,
                                                         save=False)
            except:
                msg = 'Unable to save binary. Deleted document: %s.\n%s' % \
                      (item['case_name'], traceback.format_exc())
                logger.critical(msg)
                queue.task_done()

            docket.save()
            audio_file.docket = docket
            audio_file.save(index=False)

            random_delay = random.randint(0, 3600)
            process_audio_file.apply_async(
                (audio_file.pk,),
                countdown=random_delay
            )

            logger.info("%s: Successfully added audio file %s: %s" %
                        (threading.current_thread().name,
                         audio_file.pk,
                         audio_file.case_name))
Ejemplo n.º 48
0
def view_opinion(request, pk, _):
    """Using the cluster ID, return the cluster of opinions.

    We also test if the cluster ID is a favorite for the user, and send data
    if needed. If it's a favorite, we send the bound form for the favorite so
    it can populate the form on the page. If it is not a favorite, we send the
    unbound form.
    """
    # Look up the court, cluster, title and favorite information
    cluster = get_object_or_404(OpinionCluster, pk=pk)
    title = ', '.join([s for s in [
        trunc(best_case_name(cluster), 100, ellipsis="..."),
        cluster.citation_string,
    ] if s.strip()])
    has_downloads = False
    for sub_opinion in cluster.sub_opinions.all():
        if sub_opinion.local_path or sub_opinion.download_url:
            has_downloads = True
            break
    get_string = search_utils.make_get_string(request)

    try:
        fave = Favorite.objects.get(cluster_id=cluster.pk, user=request.user)
    except (ObjectDoesNotExist, TypeError):
        # Not favorited or anonymous user
        favorite_form = FavoriteForm(initial={
            'cluster_id': cluster.pk,
            'name': trunc(best_case_name(cluster), 100, ellipsis='...'),
        })
    else:
        favorite_form = FavoriteForm(instance=fave)

    if not is_bot(request):
        # Get the citing results from Solr for speed. Only do this for humans
        # to save on disk usage.
        conn = sunburnt.SolrInterface(settings.SOLR_OPINION_URL, mode='r')
        q = {
            'q': 'cites:({ids})'.format(
                ids=' OR '.join([str(pk) for pk in
                                 (cluster.sub_opinions
                                  .values_list('pk', flat=True))])
            ),
            'rows': 5,
            'start': 0,
            'sort': 'citeCount desc',
            'caller': 'view_opinion',
        }
        citing_clusters = conn.raw_query(**q).execute()
    else:
        citing_clusters = None

    return render(request, 'view_opinion.html', {
        'title': title,
        'cluster': cluster,
        'has_downloads': has_downloads,
        'favorite_form': favorite_form,
        'get_string': get_string,
        'private': cluster.blocked,
        'citing_clusters': citing_clusters,
        'top_authorities': cluster.authorities[:5],
    })
Ejemplo n.º 49
0
def merge_cases_simple(new, target_id):
    """Add `new` to the database, merging with target_id

     Merging is done by picking the best fields from each item.
    """
    # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    # !!  THIS CODE IS OUT OF DATE AND UNMAINTAINED. FEEL FREE TO FIX IT, BUT !!
    # !!                 DO NOT TRUST IT IN ITS CURRENT STATE.                !!
    # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    target = OpinionCluster.objects.get(pk=target_id)
    print "Merging %s with" % new.case_name
    print "        %s" % target.case_name

    cached_source = target.source  # Original value is needed below.
    if target.source == 'C':
        target.source = 'LC'
    elif target.source == 'R':
        target.source = 'LR'
    elif target.source == 'CR':
        target.source = 'LCR'

    # Add the URL if it's not a court one, replacing public.resource.org's
    # info in some cases.
    if cached_source == 'R':
        target.download_url = new.download_url

    # Recreate the slug from the new case name (this changes the URL, but the
    # old will continue working)
    target.slug = slugify(trunc(new.case_name, 75))

    # Take the case name from the new item; they tend to be pretty good
    target.case_name = new.case_name

    # Add the docket number if the old doesn't exist, but keep the old if one
    # does.
    if not target.docket.docket_number:
        target.docket.docket_number = new.docket.docket_number

    # Get the citations from the new item (ditch the old).
    target.federal_cite_one = new.federal_cite_one
    target.federal_cite_two = new.federal_cite_two
    target.federal_cite_three = new.federal_cite_three
    target.state_cite_one = new.state_cite_one
    target.state_cite_two = new.state_cite_two
    target.state_cite_three = new.state_cite_three
    target.state_cite_regional = new.state_cite_regional
    target.specialty_cite_one = new.specialty_cite_one
    target.scotus_early_cite = new.scotus_early_cite
    target.lexis_cite = new.lexis_cite
    target.westlaw_cite = new.westlaw_cite
    target.neutral_cite = new.neutral_cite

    # Add judge information if lacking. New is dirty, but better than none.
    if not target.judges:
        target.judges = new.judges

    # Add the text.
    target.html_lawbox, blocked = anonymize(new.html)
    if blocked:
        target.blocked = True
        target.date_blocked = now()

    target.extracted_by_ocr = False  # No longer true for any LB case.
Ejemplo n.º 50
0
            af.judges = item['judges']
        if item['docket_number']:
            af.docket.docket_number = item['docket_number']

        court = Court.objects.get(pk=item['court_code'])
        docket.court = court

        # Fix the files. First save the location of the old files.
        original_local_path = af.local_path_original_file.path
        original_mp3_path = af.local_path_mp3.path

        # Create a new file with the contents of the old and a corrected
        # name. This is only in memory for the moment.
        cf = ContentFile(af.local_path_original_file.read())
        extension = '.' + af.local_path_original_file.path.rsplit('.', 1)[1]
        file_name = trunc(item['case_name'].lower(), 75) + extension
        af.local_path_original_file.save(file_name, cf, save=False)

        # Create a new mp3 file with the new contents
        cf = ContentFile(af.local_path_mp3.read())
        file_name = trunc(af.case_name.lower(), 72) + '_cl.mp3'
        af.local_path_mp3.save(file_name, cf, save=False)

        # Save things so they can be referenced in a sec.
        docket.save()
        af.save(index=False)

        # Update the ID3 information and duration data.
        new_mp3_path = af.local_path_mp3.path
        logger.info("Updating mpr at: %s" % new_mp3_path)
        set_mp3_meta_data(af, new_mp3_path)
Ejemplo n.º 51
0
 def __unicode__(self):
     return "<DocketEntry:%s ---> %s >" % (
         self.pk,
         trunc(self.description, 50, ellipsis="...")
     )