コード例 #1
0
def readTheData(inFileName):
    theData = defaultdict()
    dataFile = open(inFileName)
    for line in dataFile:
        sent = HistoSentence(line)
        lineSplit = line.split()

#        if '0' == lineSplit[7]:
#            print('TRASH 0 for %s %s' % (lineSplit[1], line))
#            continue

        # ok, here's a hiccup
        # we want both draft and final to sort in order of their
        #      own sentence subs
        if 'DRAFT' == lineSplit[2]:
            key = '%6s %5s %4s %4s' % (lineSplit[1], lineSplit[2], \
                                       lineSplit[7], lineSplit[12])
        elif 'FINAL' == lineSplit[2]:
            key = '%6s %5s %4s %4s' % (lineSplit[1], lineSplit[2], \
                                       lineSplit[12], lineSplit[7])
        else:
            print('ERROR: sentence is neither DRAFT nor FINAL %s' %(sent))
            sys.exit()

        sent.checkInternalCorrectness()

        theData[key] = sent

    oldkey = 'A B C D'
    oldkeysplit = oldkey.split()
    for key, value in sorted(theData.items()):
        keysplit = key.split()
        print('FOR TRASHING %s %s' % (keysplit, value))

        if ('DRAFT' == keysplit[1]) and ('FINAL' == oldkeysplit[1]):
            print('YES TRASH FINAL TO DRAFT %s' % (oldkeysplit))
            del theData[oldkey]
        if ('FINAL' == keysplit[1]) and ('DRAFT' == oldkeysplit[1]):
            print('YES TRASH DRAFT TO FINAL %s' % (oldkeysplit))
            del theData[oldkey]

        if '0' == keysplit[2]:
            if '0' == keysplit[3]:
                print('YES TRASH ZERO %s' % (keysplit))
                del theData[key]
            else:
                print('ERROR TRASH ZERO %s %s' % (keysplit, value))
                sys.exit()

        oldkey = key
        oldkeysplit = keysplit

    print('YES TRASH LAST ENTRY %s' % (oldkeysplit))
    del theData[oldkey] # don't forget the last one

    return theData
コード例 #2
0
def readTheData(inFileName):
    theData = defaultdict()
    dataFile = open(inFileName)
    for line in dataFile:
        sent = HistoSentence(line)
        lineSplit = line.split()
        # ok, here's a hiccup
        # we want both draft and final to sort in order of their
        #      own sentence subs
        if "DRAFT" == lineSplit[2]:
            key = "%6s %5s %4s %4s" % (lineSplit[1], lineSplit[2], lineSplit[7], lineSplit[12])
        elif "FINAL" == lineSplit[2]:
            key = "%6s %5s %4s %4s" % (lineSplit[1], lineSplit[2], lineSplit[12], lineSplit[7])
        else:
            print("ERROR: sentence is neither DRAFT nor FINAL %s" % (sent))
            sys.exit()

        sent.checkInternalCorrectness()

        theData[key] = sent

    return theData