Exemple #1
0
def query(request):
    """  Query the database to check which PDF documents we have.

         The json input is {"court": <court>,
                            "urls": <list of PACER doc1 urls>}

         The json output is a set of mappings:
                           {<pacer url>: { "filename": <public url>,
                                           "timestamp": <last time seen> },
                            <pacer url>: ... }
    """

    response = {}

    if request.method != "POST":
        message = "query: Not a POST request."
        logging.error(message)
        return HttpResponse(message)

    try:
        jsonin = simplejson.loads(request.POST["json"])
    except KeyError:
        message = "query: no 'json' POST argument"
        logging.warning(message)
        return HttpResponse(message)
    except ValueError:
        message = "query: malformed 'json' POST argument"
        logging.warning(message)
        return HttpResponse(message)
    except IOError:
        # Not something we can fix I don't think.  Client fails to send data.
        message = "query: Client read error (Timeout?)"
        logging.warning(message)
        return HttpResponse(message)

    try:
        court = jsonin["court"].strip()
    except KeyError:
        message = "query: missing json 'court' argument."
        logging.warning(message)
        return HttpResponse(message)

    try:
        urls = jsonin["urls"]
    except KeyError:
        message = "query: missing json 'urls' argument."
        logging.warning(message)
        return HttpResponse(message)

    for url in urls:

        # detect show_doc style document links
        sdre = re.search("show_doc\.pl\?(.*)",url)

        if sdre:
            argsstring = sdre.group(1)
            args = argsstring.split("&")
            argsdict = {}

            for arg in args:
                (key, val) = arg.split("=")
                argsdict[key] = val

            # maybe need to add some checks for whether
            # these vars exist in argsdict

            query = Document.objects.filter(court=court) \
                .filter(docnum=argsdict["doc_num"]) \
                .filter(casenum=argsdict["caseid"]) \
                .filter(dm_id=int(argsdict["dm_id"])) \
                .filter(available=1)

        else:
            # otherwise, assume it's a normal doc1 style url
            docid = UploadHandler.docid_from_url_name(url)
            query = Document.objects.filter(docid=docid) \
                .filter(available=1)


        if query:
            query = query[0]
            real_casenum = query.casenum

            response[url] = {
                "filename": IACommon.get_pdf_url(court,
                                                 real_casenum,
                                                 query.docnum,
                                                 query.subdocnum),
                "timestamp": query.lastdate.strftime("%m/%d/%y")}


            if query.subdocnum == 0:

                subquery = Document.objects.filter(court=court,
                                                   casenum=query.casenum,
                                                   docnum=query.docnum,
                                                   available=1).exclude(
                                                   subdocnum=0)

                if len(subquery) > 0:
                    response[url]["subDocuments"] = {}

                    for subDoc in subquery:
                        real_sub_casenum = subDoc.casenum
                        response[url]["subDocuments"][subDoc.subdocnum] = {
                                     "filename" : IACommon.get_pdf_url(court,
                                                              real_sub_casenum,
                                                              subDoc.docnum,
                                                              subDoc.subdocnum),
                                     "timestamp": subDoc.lastdate.strftime("%m/%d/%y")}


    jsonout = simplejson.dumps(response)

    return HttpResponse(jsonout, mimetype="application/json")
Exemple #2
0
def query(request):
    """  Query the database to check which PDF documents we have.

         The json input is {"court": <court>,
                            "urls": <list of PACER doc1 urls>}

         The json output is a set of mappings:
                           {<pacer url>: { "filename": <public url>,
                                           "timestamp": <last time seen> },
                            <pacer url>: ... }
    """
    response = {}

    if request.method != "POST":
        message = "query: Not a POST request."
        logging.error(message)
        return HttpResponse(message)

    try:
        jsonin = simplejson.loads(request.POST["json"])
    except KeyError:
        message = "query: no 'json' POST argument"
        logging.warning(message)
        return HttpResponse(message)
    except ValueError:
        message = "query: malformed 'json' POST argument"
        logging.warning(message)
        return HttpResponse(message)
    except IOError:
        # Not something we can fix I don't think.  Client fails to send data.
        message = "query: Client read error (Timeout?)"
        logging.warning(message)
        return HttpResponse(message)

    try:
        court = jsonin["court"].strip()
    except KeyError:
        message = "query: missing json 'court' argument."
        logging.warning(message)
        return HttpResponse(message)

    try:
        urls = jsonin["urls"]
    except KeyError:
        message = "query: missing json 'urls' argument."
        logging.warning(message)
        return HttpResponse(message)

    for url in urls:
        # detect show_doc style document links
        sdre = re.search("show_doc\.pl\?(.*)", url)

        if sdre:
            argsstring = sdre.group(1)
            args = argsstring.split("&")
            argsdict = {}

            for arg in args:
                (key, val) = arg.split("=")
                argsdict[key] = val

            # maybe need to add some checks for whether
            # these vars exist in argsdict
            query = Document.objects.filter(court=court) \
                .filter(docnum=argsdict["doc_num"]) \
                .filter(casenum=argsdict["caseid"]) \
                .filter(dm_id=int(argsdict["dm_id"])) \
                .filter(available=1)

        else:
            # otherwise, assume it's a normal doc1 style url
            docid = UploadHandler.docid_from_url_name(url)
            query = Document.objects.filter(docid=docid) \
                .filter(available=1)

        if query:
            query = query[0]
            real_casenum = query.casenum

            response[url] = {
                "filename":
                IACommon.get_pdf_url(court, real_casenum, query.docnum,
                                     query.subdocnum),
                "timestamp":
                query.lastdate.strftime("%m/%d/%y")
            }

            if query.subdocnum == 0:
                subquery = Document.objects.filter(
                    court=court,
                    casenum=query.casenum,
                    docnum=query.docnum,
                    available=1).exclude(subdocnum=0)

                if len(subquery) > 0:
                    response[url]["subDocuments"] = {}

                    for subDoc in subquery:
                        real_sub_casenum = subDoc.casenum
                        response[url]["subDocuments"][subDoc.subdocnum] = {
                            "filename":
                            IACommon.get_pdf_url(court, real_sub_casenum,
                                                 subDoc.docnum,
                                                 subDoc.subdocnum),
                            "timestamp":
                            subDoc.lastdate.strftime("%m/%d/%y")
                        }

    jsonout = simplejson.dumps(response)

    return HttpResponse(jsonout, mimetype="application/json")