Esempio n. 1
0
files = ["transcripts/" + f for f in listdir(mypath) if isfile(join(mypath, f))]

f = open("scores","w")
f.write("Scores:\n")
f.close()

for file in files:
    print "Now processing " + file

    slash = file.find('/')
    end_docket = file.find('_')
    if file.find('q') != -1:
        docket_number = file.find('q')
    docket_number = file[slash+1:end_docket]
    print "Checking database for docket #%s" % docket_number
    print "Winner identified: %s" % scdb.get_winning_party(docket_number)
    raw_input()

    the_transcript = transcript.get_transcript_from_PDF(file)
    petitioners, respondents = transcript.get_petitioners_and_respondents(the_transcript)
    argument = transcript.get_argument(the_transcript)

    arguments_by_advocate = transcript.get_arguments_by_advocate(petitioners, respondents, argument)

    for petitioner, argument in arguments_by_advocate["petitioner"].iteritems():
        statements = transcript.get_statements_in_argument(argument, petitioner)
        #scores.get_statistics_from_statements(statements)
        number_of_words_per_speaker = scores.get_number_of_words_per_speaker(statements)
        features = scores.get_features_from_statements(statements)
        flat_features = scores.flatten_features(features)
        #import matplotlib.pyplot as plt
Esempio n. 2
0
    slash = file.find('/')
    end_docket = file.find('_')
    if end_docket == -1:
        end_docket = file.find(".pdf")
    questionnumber = re.search(r"q\d", file)
    if questionnumber:
        end_docket = questionnumber.start()
    docket_number = file[slash+1:end_docket]
    print "Checking database for docket #%s" % docket_number
    info = scdb.get_case_info(docket_number)
    if not info:
        print "Couldn't find case in SCDB. Will skip."
        continue
    print "Now processing transcript for ", info["caseName"]
    winner = scdb.get_winning_party(docket_number)
    if winner == "unclear":
        print "The winner of this case is unclear. Will skip."
        continue
    print "Winner identified: %s" % winner
    print "Decision direction: %s" % info["decisionDirection"]

    the_transcript = transcript.get_transcript_from_PDF(file)
    petitioners, respondents = transcript.get_petitioners_and_respondents(the_transcript)
    argument = transcript.get_argument(the_transcript)

    arguments_by_advocate = transcript.get_arguments_by_advocate(petitioners, respondents, argument)

    inputs = []

    for petitioner, argument in arguments_by_advocate["petitioner"].iteritems():