예제 #1
0
def train_speaker(train_env, tok, n_iters, log_every=500, val_envs={}):
    writer = SummaryWriter(logdir=log_dir)
    listner = Seq2SeqAgent(train_env, "", tok, args.maxAction)
    speaker = Speaker(train_env, listner, tok)

    if args.fast_train:
        log_every = 40

    best_bleu = defaultdict(lambda: 0)
    best_loss = defaultdict(lambda: 1232)
    for idx in range(0, n_iters, log_every):
        interval = min(log_every, n_iters - idx)

        # Train for log_every interval
        speaker.env = train_env
        speaker.train(interval)   # Train interval iters

        print()
        print("Iter: %d" % idx)

        # Evaluation
        for env_name, (env, evaluator) in val_envs.items():
            if 'train' in env_name: # Ignore the large training set for the efficiency
                continue

            print("............ Evaluating %s ............." % env_name)
            speaker.env = env
            path2inst, loss, word_accu, sent_accu = speaker.valid()
            path_id = next(iter(path2inst.keys()))
            print("Inference: ", tok.decode_sentence(path2inst[path_id]))
            print("GT: ", evaluator.gt[str(path_id)]['instructions'])
            bleu_score, precisions = evaluator.bleu_score(path2inst)

            # Tensorboard log
            writer.add_scalar("bleu/%s" % (env_name), bleu_score, idx)
            writer.add_scalar("loss/%s" % (env_name), loss, idx)
            writer.add_scalar("word_accu/%s" % (env_name), word_accu, idx)
            writer.add_scalar("sent_accu/%s" % (env_name), sent_accu, idx)
            writer.add_scalar("bleu4/%s" % (env_name), precisions[3], idx)

            # Save the model according to the bleu score
            if bleu_score > best_bleu[env_name]:
                best_bleu[env_name] = bleu_score
                print('Save the model with %s BEST env bleu %0.4f' % (env_name, bleu_score))
                speaker.save(idx, os.path.join(log_dir, 'state_dict', 'best_%s_bleu' % env_name))

            if loss < best_loss[env_name]:
                best_loss[env_name] = loss
                print('Save the model with %s BEST env loss %0.4f' % (env_name, loss))
                speaker.save(idx, os.path.join(log_dir, 'state_dict', 'best_%s_loss' % env_name))

            # Screen print out
            print("Bleu 1: %0.4f Bleu 2: %0.4f, Bleu 3 :%0.4f,  Bleu 4: %0.4f" % tuple(precisions))
def main(argv):
    defaultEncoding = "utf8"

    usage = "usage: %prog [options] <corpus name> <corpus outfile>\n " + __doc__
    optionParser = OptionParser(usage=usage)
    optionParser.add_option("-E",
                            "--encoding",
                            default=defaultEncoding,
                            dest="encoding",
                            help="encoding [" + defaultEncoding + "]")
    optionParser.add_option(
        "-f",
        "--file",
        dest="sourcefilename",
        help=
        "source filename with at least sentence IDs and orthographies specified in a file"
    )
    optionParser.add_option("-F",
                            "--Field",
                            default="orth",
                            dest="orthfieldname",
                            help="orth field name, e.g. orth or zip [orth]")
    optionParser.add_option("-d",
                            "--delimiter",
                            default=";",
                            dest="delimiter",
                            help="field delimiter in the plain file [;]")
    optionParser.add_option(
        "-a",
        "--all",
        dest="splitall",
        action="store_true",
        help="split every record into a single corpus file")
    optionParser.add_option("-w",
                            "--wordcount",
                            dest="wordlist",
                            action="store_true",
                            help="print word list")
    optionParser.add_option("-v",
                            "--verbose",
                            dest="verbose",
                            action="store_true")
    (options, args) = optionParser.parse_args()

    if len(args) != 2:
        optionParser.error("incorrect number of arguments %d" % len(args))
        sys.exit()

    #set filenames
    corpusname = args[0]
    corpusFilename = args[1]

    if not corpusFilename.rfind(".corpus"):
        corpusFilename += ".corpus"
    recordingsFilename = corpusFilename[:corpusFilename.
                                        rfind('.')] + ".recordings"
    speakerDescriptionFilename = corpusFilename[:corpusFilename.
                                                rfind('.')] + ".speaker"

    if options.verbose:
        print "corpusFilename =", corpusFilename
        print "speakerDescriptionFilename =", speakerDescriptionFilename
        print "recordingsFilename =", recordingsFilename

    #create corpus include structure
    newCorpus = Corpus(corpusname, encoding="utf8")
    newCorpus.includeFile(os.path.abspath(speakerDescriptionFilename))
    newCorpus.includeFile(os.path.abspath(recordingsFilename))
    newCorpus.save(corpusFilename)

    # create default speaker description file
    speakers = []
    speakerDescription = Speaker(corpusname, encoding="utf8")

    #read sentence IDs and structure information
    sentenceIDsFile = uopen(options.sourcefilename, options.encoding, 'r')
    # sentenceIDsFile = open(options.sourcefilename,'r')
    firstLine = sentenceIDsFile.readline()[:-1]
    #   fieldList = unicode(firstLine, options.encoding).split(options.delimiter)
    fieldList = firstLine.split(options.delimiter)
    if options.verbose:
        print "structure:", firstLine, fieldList
    fieldMap = {}
    fieldId = 0
    for field in fieldList:
        fieldMap[field] = fieldId
        if options.verbose:
            print field, fieldId
        fieldId += 1

    #check required fields
    if not fieldMap.has_key('name') or not fieldMap.has_key(
            'video') or not fieldMap.has_key('orth'):
        print "ERROR: one or more required fields [name,video, and/or orth] are missing."
        keys = fieldMap.keys()
        keys.sort
        for key in keys:
            print key, fieldMap[key]
        sys.exit()

    #create recordings from data info file
    newRecordings = Corpus(corpusname, encoding="utf-8")
    sentenceCnt = 0
    for line in sentenceIDsFile:
        sentenceCnt += 1
        #       splitlist = unicode(line, options.encoding).strip().split(options.delimiter)
        splitlist = line.strip().split(options.delimiter)
        if options.verbose:
            for i in range(0, len(splitlist)):
                print i, splitlist[i]
        if len(splitlist) < len(fieldMap):
            if options.verbose:
                print "ERROR: data row '%s' is invalid and will be discarded." % (
                    splitlist)
        else:
            start = -1
            end = -1
            speakerName = "default"
            speakerGender = "male"
            recordOrth = ""
            recordTranslation = ""

            if (fieldMap.get("start") != None):
                start = splitlist[fieldMap['start']]
            if (fieldMap.get("end") != None):
                end = splitlist[fieldMap['end']]
            if (fieldMap.get("speaker") != None):
                speakerName = splitlist[fieldMap['speaker']]
            if (fieldMap.get("gender") != None):
                speakerGender = splitlist[fieldMap['gender']]
            if (fieldMap.get("translation") != None):
                recordTranslation = splitlist[fieldMap['translation']]

            # update speaker names
            if speakerName not in speakers:
                speakerDescription.addSpeakerDescription(
                    speakerName, speakerGender)
                speakers.append(speakerName)

            # add recording
            newRecordings.addRecording(
                splitlist[fieldMap['name']], splitlist[fieldMap['video']],
                start, end, speakerName,
                splitlist[fieldMap[options.orthfieldname]], False,
                recordTranslation)

    # close corpus and write to xml file
    uclose(sentenceIDsFile)
    newRecordings.save(recordingsFilename)
    speakerDescription.save(speakerDescriptionFilename)

    print "\n----------------------------------------------------------"
    print "corpus file               :'" + corpusFilename + "'"
    print "speaker description file  :'" + speakerDescriptionFilename + "'"
    print "recordings file           :'" + recordingsFilename + "'"
    print