Example #1
0
def _cron_process_PDF(obj, ppentry):
    filename = ppentry.filename
    meta = IACommon.get_meta_from_filename(filename)
    court = meta["court"]
    casenum = meta["casenum"]
    docnum = meta["docnum"]
    subdocnum = meta["subdocnum"]

    invalid_PDF = _is_invalid_pdf(obj, filename)

    # We only want to check for ssns on valid PDFs
    # PyPdf doesn't deal well with bad input
    if not invalid_PDF:
        # SSN privacy check
        has_ssn = _has_ssn(obj, filename)
    else:
        has_ssn = False

    # Blacklist file check
    in_blacklist = _in_blacklist(filename)

    if invalid_PDF or has_ssn or in_blacklist:
        docket = DocketXML.make_docket_for_pdf("",
                                               court,
                                               casenum,
                                               docnum,
                                               subdocnum,
                                               available=0)
        UploadHandler.do_me_up(docket)

        # Delete the entry from the DB
        ppentry.delete()
        # Quarantine the pickle file for analysis
        _quarantine_pickle(filename,
                           ssn=has_ssn,
                           blacklist_file=in_blacklist,
                           invalid_PDF=invalid_PDF)

        return

    put_result, put_msg = _dispatch_put(obj, ppentry)

    if put_result:
        # Put success-- mark this document as available in the DB
        DocumentManager.mark_as_available(filename)

        docket = DocketXML.make_docket_for_pdf("",
                                               court,
                                               casenum,
                                               docnum,
                                               subdocnum,
                                               available=1)
        UploadHandler.do_me_up(docket)

    print "  %s %s" % (filename, put_msg)
Example #2
0
def _cron_process_PDF(obj, ppentry):

    filename = ppentry.filename
    meta = IACommon.get_meta_from_filename(filename)
    court = meta["court"]
    casenum = meta["casenum"]
    docnum = meta["docnum"]
    subdocnum = meta["subdocnum"]

    invalid_PDF = _is_invalid_pdf(obj, filename)

    # We only want to check for ssns on valid PDFs
    # PyPdf doesn't deal well with bad input
    if not invalid_PDF:
       # SSN privacy check
       has_ssn = _has_ssn(obj, filename)
    else:
       has_ssn = False

    # Blacklist file check
    in_blacklist = _in_blacklist(filename)

    if invalid_PDF or has_ssn or in_blacklist:

        docket = DocketXML.make_docket_for_pdf("", court, casenum, docnum,
                                               subdocnum, available=0)
        UploadHandler.do_me_up(docket)

        # Delete the entry from the DB
        ppentry.delete()
        # Quarantine the pickle file for analysis
        _quarantine_pickle(filename, ssn=has_ssn, blacklist_file= in_blacklist, invalid_PDF= invalid_PDF)

        return


    put_result, put_msg = _dispatch_put(obj, ppentry)

    if put_result:
        # Put success-- mark this document as available in the DB
        DocumentManager.mark_as_available(filename)

        docket = DocketXML.make_docket_for_pdf("", court, casenum, docnum,
                                               subdocnum, available=1)
        UploadHandler.do_me_up(docket)


    print "  %s %s" % (filename, put_msg)
Example #3
0
def mark_as_available(filename):
    docmeta = IACommon.get_meta_from_filename(filename)

    docquery = Document.objects.filter(
        court=docmeta["court"], casenum=docmeta["casenum"], docnum=docmeta["docnum"], subdocnum=docmeta["subdocnum"]
    )

    try:
        docentry = docquery[0]
    except IndexError:
        # Unexpected case.  No Document entry
        logging.error("mark_as_available: no entry for %s." % (filename))
    else:
        docentry.available = 1
        try:
            docentry.save()
        except IntegrityError:
            logging.error("mark_as_available: could not save %s." % (filename))
Example #4
0
def mark_as_available(filename):
    docmeta = IACommon.get_meta_from_filename(filename)

    docquery = Document.objects.filter(court=docmeta["court"],
                                       casenum=docmeta["casenum"],
                                       docnum=docmeta["docnum"],
                                       subdocnum=docmeta["subdocnum"])

    try:
        docentry = docquery[0]
    except IndexError:
        # Unexpected case.  No Document entry
        logging.error("mark_as_available: no entry for %s." % (filename))
    else:
        docentry.available = 1
        try:
            docentry.save()
        except IntegrityError:
            logging.error("mark_as_available: could not save %s." % (filename))
Example #5
0
def _cron_put_pickles():

    # Get uploader credentials.
    uploader_query = Uploader.objects.filter(key=AUTH_KEY)
    try:
        RECAP_UPLOADER_ID = uploader_query[0].id
    except IndexError:
        print "  could not find uploader with key=%s" % AUTH_KEY
        return

    # Get all ready pickles
    query = PickledPut.objects.filter(ready=1, processing=0) \
                              .order_by('-filename')

    # Set all ready pickles to the processing state
    #for ppentry in query:
    #    ppentry.processing = 1
    #    ppentry.save()

    # Keep track of court, casenum.  Only lock and unlock once for each case.
    curr_court = None
    curr_casenum = None
    lock_nonce = None

    # Process pickles one at a time.
    for ppentry in query:

        filename = ppentry.filename

        ppmeta = IACommon.get_meta_from_filename(filename)

        court = ppmeta["court"]
        casenum = ppmeta["casenum"]

        # Make sure we have the lock for this case.

        if curr_court == court and curr_casenum == casenum:
            # Same case as the previous ppentry.

            if not lock_nonce:
                # Skip if we don't have the lock already.
#               ppentry.processing = 0
#               ppentry.save()
                continue

            # Otherwise, we already have the lock, so continue.

        else:
            # Switching to a new case.

            # Drop the current lock (from previous case), if necessary.
            if curr_court and curr_casenum:
                dropped, errmsg = BucketLockManager.drop_lock(curr_court,
                                                              curr_casenum,
                                                              RECAP_UPLOADER_ID,
                                                              nolocaldb=1)
                if not dropped:
                    print "  %s.%s someone stole my lock?" % \
                        (court, unicode(casenum))

            # Grab new lock
            curr_court = court
            curr_casenum = casenum


            lock_nonce, errmsg = BucketLockManager.get_lock(court, casenum,
                                                            RECAP_UPLOADER_ID,
                                                            one_per_uploader=1)

            if not lock_nonce:
                print "  Passing on %s.%s: %s" % (court, casenum, errmsg)

                # We don't have a lock, so don't drop the lock in the next loop
                curr_court = None
                curr_casenum = None
                continue

        # We'll always have the lock here.

        # Unpickle the object
        obj, unpickle_msg = unpickle_object(filename)

        # Two cases for the unpickled object: Request or DocketXML
        if obj and ppentry.docket:
            _cron_process_docketXML(obj, ppentry)

        elif obj:
            # Dispatch the PUT request

            _cron_process_PDF(obj, ppentry)

        else:
           # Unpickling failed
           # If unpickling fails, it could mean that another cron job
           # has already finished this PP - not sure how to distinguish this
            print "  %s %s (Another cron job completed?)" % (filename, unpickle_msg)

            # Delete the entry from the DB
            ppentry.delete()
            # Delete the pickle file
            delete_pickle(filename)

    # Drop last lock
    if curr_court and curr_casenum:
        dropped, errmsg = BucketLockManager.drop_lock(curr_court, curr_casenum,
                                                      RECAP_UPLOADER_ID,
                                                      nolocaldb=1)
        if not dropped:
            print "  %s.%s someone stole my lock??" % (court, unicode(casenum))
Example #6
0
def _cron_put_pickles():
    # Get uploader credentials.
    uploader_query = Uploader.objects.filter(key=AUTH_KEY)
    try:
        RECAP_UPLOADER_ID = uploader_query[0].id
    except IndexError:
        print "  could not find uploader with key=%s" % AUTH_KEY
        return

    # Get all ready pickles
    query = PickledPut.objects.filter(ready=1, processing=0) \
        .order_by('-filename')

    # Set all ready pickles to the processing state
    # for ppentry in query:
    #    ppentry.processing = 1
    #    ppentry.save()

    # Keep track of court, casenum.  Only lock and unlock once for each case.
    curr_court = None
    curr_casenum = None
    lock_nonce = None

    # Process pickles one at a time.
    for ppentry in query:

        filename = ppentry.filename

        ppmeta = IACommon.get_meta_from_filename(filename)

        court = ppmeta["court"]
        casenum = ppmeta["casenum"]

        # Make sure we have the lock for this case.

        if curr_court == court and curr_casenum == casenum:
            # Same case as the previous ppentry.

            if not lock_nonce:
                # Skip if we don't have the lock already.
                #               ppentry.processing = 0
                #               ppentry.save()
                continue

                # Otherwise, we already have the lock, so continue.

        else:
            # Switching to a new case.

            # Drop the current lock (from previous case), if necessary.
            if curr_court and curr_casenum:
                dropped, errmsg = BucketLockManager.drop_lock(
                    curr_court, curr_casenum, RECAP_UPLOADER_ID, nolocaldb=1)
                if not dropped:
                    print "  %s.%s someone stole my lock?" % \
                          (court, unicode(casenum))

            # Grab new lock
            curr_court = court
            curr_casenum = casenum

            lock_nonce, errmsg = BucketLockManager.get_lock(court,
                                                            casenum,
                                                            RECAP_UPLOADER_ID,
                                                            one_per_uploader=1)
            if not lock_nonce:
                print "  Passing on %s.%s: %s" % (court, casenum, errmsg)

            if not lock_nonce or lock_nonce == 'bigdoc':
                # We don't have a lock, so don't drop the lock in the next loop
                curr_court = None
                curr_casenum = None
                continue

        # We'll always have the lock here.

        # Unpickle the object
        obj, unpickle_msg = unpickle_object(filename)

        # Two cases for the unpickled object: Request or DocketXML
        if obj and ppentry.docket:
            print "Processing docket: %s" % filename
            _cron_process_docketXML(obj, ppentry)

        elif obj:
            # Dispatch the PUT request

            _cron_process_PDF(obj, ppentry)

        else:
            # Unpickling failed
            # If unpickling fails, it could mean that another cron job
            # has already finished this PP - not sure how to distinguish this
            print "  %s %s (Another cron job completed?)" % (filename,
                                                             unpickle_msg)

            # Delete the entry from the DB
            ppentry.delete()
            # Delete the pickle file
            delete_pickle(filename)

    # Drop last lock
    if curr_court and curr_casenum:
        dropped, errmsg = BucketLockManager.drop_lock(curr_court,
                                                      curr_casenum,
                                                      RECAP_UPLOADER_ID,
                                                      nolocaldb=1)
        if not dropped:
            print "  %s.%s someone stole my lock??" % (court, unicode(casenum))