def predict_case(docket_number, leaning=None): log = logging.getLogger('PREDICT_CASE') if not leaning: leaning = scdb.get_case_info(docket_number)["decisionDirection"] the_transcript = transcript.get_transcript_from_PDF("transcripts/" + docket_number + ".pdf") petitioners, respondents = transcript.get_petitioners_and_respondents(the_transcript) argument = transcript.get_argument(the_transcript) arguments_by_advocate = transcript.get_arguments_by_advocate(petitioners, respondents, argument) inputs = [] for petitioner, argument in arguments_by_advocate["petitioner"].iteritems(): statements = transcript.get_statements_in_argument(argument, petitioner) number_of_words_per_speaker = scores.get_number_of_words_per_speaker(statements) features = scores.get_features_from_statements(statements) flat_features = scores.flatten_features(features) normalized = scores.normalize_feature_list(flat_features) myscores = scores.get_feature_vector(normalized) inputs.extend(myscores) for respondent, argument in arguments_by_advocate["respondent"].iteritems(): statements = transcript.get_statements_in_argument(argument, respondent) number_of_words_per_speaker = scores.get_number_of_words_per_speaker(statements) features = scores.get_features_from_statements(statements) flat_features = scores.flatten_features(features) normalized = scores.normalize_feature_list(flat_features) myscores = scores.get_feature_vector(normalized) inputs.extend(myscores) if len(inputs) < 774: # We were unable to match all arguments # Not gonna help us, continue print "Parsing error? Did not create enough features." return 0 if len(inputs) > 774: # More than one argument per advocate # just get the first two inputs = inputs[:774] print "Parsing error? Had to truncate features." inputs.append(leaning) log.info(inputs[:25]) log.info("Querying network. . .") return net.activate(inputs)[0]
def inline_predict(docket_number): info = scdb.get_case_info(docket_number) result = predict_case(docket_number) resultint = int(round(result)) if resultint > 1: resultint = 1 if resultint == 1: winner = "Petitioner" else: winner = "Respondent" if info["partyWinning"] == "1": actual = "Petitioner" elif info["partyWinning"] == "0": actual = "Respondent" else: actual = "Unclear" correct = False if winner == actual: correct = True confidence_level = int(abs(result - 0.5)*100) if confidence_level < 40: low_confidence = True return render_template('inline_predict.html', **locals())
net = buildNetwork(775, 9, 1) for file in files: print "Now processing " + file slash = file.find('/') end_docket = file.find('_') if end_docket == -1: end_docket = file.find(".pdf") questionnumber = re.search(r"q\d", file) if questionnumber: end_docket = questionnumber.start() docket_number = file[slash+1:end_docket] print "Checking database for docket #%s" % docket_number info = scdb.get_case_info(docket_number) if not info: print "Couldn't find case in SCDB. Will skip." continue print "Now processing transcript for ", info["caseName"] winner = scdb.get_winning_party(docket_number) if winner == "unclear": print "The winner of this case is unclear. Will skip." continue print "Winner identified: %s" % winner print "Decision direction: %s" % info["decisionDirection"] the_transcript = transcript.get_transcript_from_PDF(file) petitioners, respondents = transcript.get_petitioners_and_respondents(the_transcript) argument = transcript.get_argument(the_transcript)
def predict(docket_number): info = scdb.get_case_info(docket_number) return render_template('predict.html', **locals())
def get_case_info(docket_number): return jsonify(scdb.get_case_info(docket_number))