files = ["transcripts/" + f for f in listdir(mypath) if isfile(join(mypath, f))] f = open("scores","w") f.write("Scores:\n") f.close() for file in files: print "Now processing " + file slash = file.find('/') end_docket = file.find('_') if file.find('q') != -1: docket_number = file.find('q') docket_number = file[slash+1:end_docket] print "Checking database for docket #%s" % docket_number print "Winner identified: %s" % scdb.get_winning_party(docket_number) raw_input() the_transcript = transcript.get_transcript_from_PDF(file) petitioners, respondents = transcript.get_petitioners_and_respondents(the_transcript) argument = transcript.get_argument(the_transcript) arguments_by_advocate = transcript.get_arguments_by_advocate(petitioners, respondents, argument) for petitioner, argument in arguments_by_advocate["petitioner"].iteritems(): statements = transcript.get_statements_in_argument(argument, petitioner) #scores.get_statistics_from_statements(statements) number_of_words_per_speaker = scores.get_number_of_words_per_speaker(statements) features = scores.get_features_from_statements(statements) flat_features = scores.flatten_features(features) #import matplotlib.pyplot as plt
slash = file.find('/') end_docket = file.find('_') if end_docket == -1: end_docket = file.find(".pdf") questionnumber = re.search(r"q\d", file) if questionnumber: end_docket = questionnumber.start() docket_number = file[slash+1:end_docket] print "Checking database for docket #%s" % docket_number info = scdb.get_case_info(docket_number) if not info: print "Couldn't find case in SCDB. Will skip." continue print "Now processing transcript for ", info["caseName"] winner = scdb.get_winning_party(docket_number) if winner == "unclear": print "The winner of this case is unclear. Will skip." continue print "Winner identified: %s" % winner print "Decision direction: %s" % info["decisionDirection"] the_transcript = transcript.get_transcript_from_PDF(file) petitioners, respondents = transcript.get_petitioners_and_respondents(the_transcript) argument = transcript.get_argument(the_transcript) arguments_by_advocate = transcript.get_arguments_by_advocate(petitioners, respondents, argument) inputs = [] for petitioner, argument in arguments_by_advocate["petitioner"].iteritems():