コード例 #1
0
def main(argv):
    argv = []
    argv.append('--dataset')
    argv.append('dstc5_dev')
    argv.append('--dataroot')
    argv.append('../data')
    argv.append('--trackfile')
    argv.append('baseline_dev.json')
    argv.append('--ontology')
    argv.append('config/ontology_dstc5.json')
    argv.append('--method')
    argv.append('1')
    print argv
    
    
    parser = argparse.ArgumentParser(description='Simple hand-crafted dialog state tracker baseline.')
    parser.add_argument('--dataset', dest='dataset', action='store', metavar='DATASET', required=True, help='The dataset to analyze')
    parser.add_argument('--dataroot',dest='dataroot',action='store',required=True,metavar='PATH', help='Will look for corpus in <destroot>/<dataset>/...')
    parser.add_argument('--trackfile',dest='trackfile',action='store',required=True,metavar='JSON_FILE', help='File to write with tracker output')
    parser.add_argument('--ontology',dest='ontology',action='store',metavar='JSON_FILE',required=True,help='JSON Ontology file')
    parser.add_argument('--method',dest='method',action='store',choices=['1', '2'],required=True,help='Baseline mode')

    args = parser.parse_args(argv)
    dataset = dataset_walker.dataset_walker(args.dataset, dataroot=args.dataroot, labels=False, translations = True)

    track_file = open(args.trackfile, "wb")
    track = {"sessions":[]}
    track["dataset"]  = args.dataset
    start_time = time.time()

    if args.method == '1':
        tagsets = ontology_reader.OntologyReader(args.ontology).get_tagsets()
        tracker = BaselineMethod1(tagsets)
    elif args.method == '2':
        translated_tagsets = ontology_reader.OntologyReader(args.ontology).get_translated_tagsets()
        tracker = BaselineMethod2(translated_tagsets)

    for call in dataset:
        this_session = {"session_id":call.log["session_id"], "utterances":[]}
        tracker.reset()
        for (utter, translations, _) in call:
            print utter, translations
            assert False
            sys.stderr.write('%d:%d      \r'%(call.log['session_id'], utter['utter_index']))
            tracker_result = tracker.addUtter(utter, translations)
            if tracker_result is not None:
                this_session["utterances"].append(copy.deepcopy(tracker_result))
        track["sessions"].append(this_session)
    end_time = time.time()
    elapsed_time = end_time - start_time
    track['wall_time'] = elapsed_time

    json.dump(track, track_file, indent=4)

    track_file.close()
コード例 #2
0
def main(argv):
	install_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	utils_dirname = os.path.join(install_path,'lib')

	sys.path.append(utils_dirname)
	from dataset_walker import dataset_walker
	
	parser = argparse.ArgumentParser(description='Check the validity of a tracker output object.')
	parser.add_argument('--dataset', dest='dataset', action='store', metavar='DATASET', required=True,
						help='The dataset to analyze')
	parser.add_argument('--dataroot',dest='dataroot',action='store', metavar='PATH', required=True,
						help='Will look for corpus in <destroot>/<dataset>/...')
	parser.add_argument('--trackfile',dest='scorefile',action='store',metavar='JSON_FILE',required=True,
						help='File containing score JSON')
	parser.add_argument('--ontology',dest='ontology',action='store',metavar='JSON_FILE',required=True,
						help='JSON Ontology file')

	args = parser.parse_args()

	sessions = dataset_walker(args.dataset,dataroot=args.dataroot,labels=False)
	tracker_output = json.load(open(args.scorefile))

	tagsets = ontology_reader.OntologyReader(args.ontology).get_tagsets()

	checker = TrackChecker(sessions, tracker_output, tagsets)
	checker.check()
	checker.print_errors()
コード例 #3
0
def main(argv):
    parser = argparse.ArgumentParser(
        description='Simple hand-crafted dialog state tracker baseline.')
    parser.add_argument('--dataset',
                        dest='dataset',
                        action='store',
                        metavar='DATASET',
                        required=True,
                        help='The dataset to analyze')
    parser.add_argument(
        '--dataroot',
        dest='dataroot',
        action='store',
        required=True,
        metavar='PATH',
        help='Will look for corpus in <destroot>/<dataset>/...')
    parser.add_argument('--trackfile',
                        dest='trackfile',
                        action='store',
                        required=True,
                        metavar='JSON_FILE',
                        help='File to write with tracker output')
    parser.add_argument('--ontology',
                        dest='ontology',
                        action='store',
                        metavar='JSON_FILE',
                        required=True,
                        help='JSON Ontology file')

    args = parser.parse_args()
    dataset = dataset_walker.dataset_walker(args.dataset,
                                            dataroot=args.dataroot,
                                            labels=False)
    tagsets = ontology_reader.OntologyReader(args.ontology).get_tagsets()

    track_file = open(args.trackfile, "wb")
    track = {"sessions": []}
    track["dataset"] = args.dataset
    start_time = time.time()

    tracker = BaselineTracker(tagsets)
    for call in dataset:
        this_session = {"session_id": call.log["session_id"], "utterances": []}
        tracker.reset()
        for (utter, _) in call:
            sys.stderr.write('%d:%d\n' %
                             (call.log['session_id'], utter['utter_index']))
            tracker_result = tracker.addUtter(utter)
            if tracker_result is not None:
                this_session["utterances"].append(
                    copy.deepcopy(tracker_result))
        track["sessions"].append(this_session)

    end_time = time.time()
    elapsed_time = end_time - start_time
    track['wall_time'] = elapsed_time

    json.dump(track, track_file, indent=4)

    track_file.close()
コード例 #4
0
def main(argv):
    install_path = os.path.abspath(
        os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
    utils_dirname = os.path.join(install_path, 'lib')

    sys.path.append(utils_dirname)
    from dataset_walker import dataset_walker

    parser = argparse.ArgumentParser(
        description='Check the validity of a system output for SLU task.')
    parser.add_argument('--dataset',
                        dest='dataset',
                        action='store',
                        metavar='DATASET',
                        required=True,
                        help='The dataset to analyze')
    parser.add_argument(
        '--dataroot',
        dest='dataroot',
        action='store',
        metavar='PATH',
        required=True,
        help='Will look for corpus in <destroot>/<dataset>/...')
    parser.add_argument('--jsonfile',
                        dest='jsonfile',
                        action='store',
                        metavar='JSON_FILE',
                        required=True,
                        help='File containing JSON output')
    parser.add_argument('--ontology',
                        dest='ontology',
                        action='store',
                        metavar='JSON_FILE',
                        required=True,
                        help='JSON Ontology file')
    parser.add_argument('--roletype',
                        dest='roletype',
                        action='store',
                        choices=['GUIDE', 'TOURIST'],
                        required=True,
                        help='Target role')

    args = parser.parse_args()

    sessions = dataset_walker(args.dataset,
                              dataroot=args.dataroot,
                              labels=False)
    system_output = json.load(open(args.jsonfile))

    tagsets = ontology_reader.OntologyReader(args.ontology).get_pilot_tagsets()

    checker = TrackChecker(sessions, system_output, tagsets, args.roletype)
    checker.check()
    checker.print_errors()
 def __init__(self,tagsets=None,nameOfODictPickle=None):
     if self.isIgnoreIlligalSlotValueInEstimation:
         print "Naive Ensembler ignore following values:"
         print self.listIlligalSlotValues
     #Load
     if nameOfODictPickle is not None:
         print "Load output dictionary file from " + nameOfODictPickle
         self.dictOut=pickle.load(open(nameOfODictPickle,"r"))
         assert self.dictOut is not None, "Failed to read Output dictionary"
     if tagsets==None:
         self.tagsets = ontology_reader.OntologyReader("scripts/config/ontology_dstc4.json").get_tagsets()
     else:
         self.tagsets=tagsets
     #
     self.__initBaseTracker()
     #
     self.frame = {}
     self.reset()
def errorAnalysis(argv):
    print "ERROR ANALYSIS OF NAIVEENSEMBLER"
    print argv

    parser = argparse.ArgumentParser(
        description='Simple hand-crafted dialog state tracker baseline.')
    parser.add_argument('--dataset',
                        dest='dataset',
                        action='store',
                        metavar='DATASET',
                        required=True,
                        help='The dataset to analyze')
    parser.add_argument(
        '--dataroot',
        dest='dataroot',
        action='store',
        required=True,
        metavar='PATH',
        help='Will look for corpus in <destroot>/<dataset>/...')
    parser.add_argument('--trackfile',
                        dest='trackfile',
                        action='store',
                        required=True,
                        metavar='JSON_FILE',
                        help='File to write with tracker output')
    parser.add_argument('--ontology',
                        dest='ontology',
                        action='store',
                        metavar='JSON_FILE',
                        required=True,
                        help='JSON Ontology file')

    #args = parser.parse_args()
    args = parser.parse_args(argv)
    dataset = dataset_walker.dataset_walker(args.dataset,
                                            dataroot=args.dataroot,
                                            labels=True)
    tagsets = ontology_reader.OntologyReader(args.ontology).get_tagsets()

    track = {"sessions": []}
    track["dataset"] = args.dataset
    start_time = time.time()

    tracker = NaiveEnsembleBasedTrackerWithNBest(
        tagsets, nameOfODictPickle="dictOutput.pic")
    for call in dataset:
        this_session = {"session_id": call.log["session_id"], "utterances": []}
        tracker.reset()
        for (utter, label) in call:
            #-mae shori2
            if utter['segment_info']['target_bio'] == 'B':
                print "\n -----New sub-dialogue----------------------------------------------------"
            print "s:" + str(call.log['session_id']) + " u:" + str(
                utter['utter_index'])
            print "Input=" + utter["transcript"]
            tracker_result = tracker.addUtter(utter, call)
            if tracker_result is not None:
                this_session["utterances"].append(tracker_result)
                #
                print "Tracker's output:"
                print tracker_result
                if "frame_label" in label:
                    for slot in label["frame_label"].keys():
                        if (slot not in tracker_result["frame_label"]):
                            print "-slot [" + slot + "] is not exsisted in output"
                            for value in label["frame_label"][slot]:
                                print "-value [" + value + "] of slot [" + slot + "] is not exsisted in output"
                        else:
                            if len(label["frame_label"][slot]) != len(
                                    tracker_result["frame_label"][slot]):
                                #In case value in output, but repudant
                                print "-slot [" + slot + "] include repudant values"
                            for value in label["frame_label"][slot]:
                                #In case value not in output
                                if (value not in tracker_result["frame_label"]
                                    [slot]):
                                    print "-value [" + value + "] of slot [" + slot + "] is not exsisted in output"
        track["sessions"].append(this_session)
    end_time = time.time()
    elapsed_time = end_time - start_time
    track['wall_time'] = elapsed_time
コード例 #7
0
def main(argv):
    #TODO implementation
    #Confirmation hypothesis about data
    tagsets = ontology_reader.OntologyReader("scripts/config/ontology_dstc4.json").get_tagsets()
    datasetTrain = dataset_walker.dataset_walker("dstc4_train",dataroot="data",labels=True)
    datasetDev = dataset_walker.dataset_walker("dstc4_dev",dataroot="data",labels=True)
    print "Calculate statics of dialog. "
    #-Is number of value in each slot is always 1 if it exist? i.e., it does not contain multiple value?
    #-There are many multiple value
    isEnumerateMultiValueCase=True
    isEnumerateMultiSlotCase=True       
    countMultipleValueInOneSlot=0
    #
    maxSlotValueTrain={}
    countMultipleSlot=0
    for call in datasetTrain:
        for (uttr,label) in call:
            if "frame_label" in label:
                if isEnumerateMultiSlotCase:
                    if len(label["frame_label"].keys()) > 1:
                        print label["frame_label"].keys()
                        countMultipleSlot+=1
                for slot in label["frame_label"].keys():
                    if isEnumerateMultiValueCase:
                        if slot not in maxSlotValueTrain:
                            maxSlotValueTrain[slot]=len(label["frame_label"][slot])
                        else:
                            if maxSlotValueTrain[slot] < len(label["frame_label"][slot]):
                                maxSlotValueTrain[slot] = len(label["frame_label"][slot])
                        if len(label["frame_label"][slot]) > 1:
                            print "slot=" + slot + ":",
                            print label["frame_label"][slot]
                            countMultipleValueInOneSlot+=1                            
    
    for call in datasetDev:
        for (uttr,label) in call:
            if "frame_label" in label:
                if isEnumerateMultiSlotCase:
                    if len(label["frame_label"].keys()) > 1:
                        print label["frame_label"].keys()
                        countMultipleSlot+=1
                for slot in label["frame_label"].keys():
                    if isEnumerateMultiValueCase:
                        if slot not in maxSlotValueTrain:
                            maxSlotValueTrain[slot]=len(label["frame_label"][slot])
                        else:
                            if maxSlotValueTrain[slot] < len(label["frame_label"][slot]):
                                maxSlotValueTrain[slot] = len(label["frame_label"][slot])
                        if len(label["frame_label"][slot]) > 1:
                            print "slot=" + slot + ":",
                            print label["frame_label"][slot]
                            countMultipleValueInOneSlot+=1
    if isEnumerateMultiValueCase:
        print "Number of multiple value situation = " + str(countMultipleValueInOneSlot)
        avr=0.0
        for slot in maxSlotValueTrain.keys():
            avr+=(float)(maxSlotValueTrain[slot])
        avr/=float(len(maxSlotValueTrain.keys()))
        maxSlotValueTrain["AverageNumber"]=int(round(avr))
        print "Number of max slot value per slot:"
        print maxSlotValueTrain
        
    if isEnumerateMultiSlotCase:
        print "Number of multiple slot situation = " + str(countMultipleSlot)        
    #-How many OOV case?
    #-Train -> dev: 1195, Dev->Train: 4789
    #-With additional text normalizing, Train -> dev: 937, Dev->Train: 3643
    #-With additional normalization Train -> dev: 831, Dev->Train: 3237
    isCountNumberofOOVCase=False
    dictVocabInTrain={}
    dictVocabInDev={}
    numberOfOOVCaseInTrain2Dev=0
    numberOfOOVCaseInDev2Train=0
    if isCountNumberofOOVCase:
        for call in datasetTrain:
            for (uttr,label) in call:
                trans=uttr["transcript"]
                transt=re.sub("\,","",trans)
                transt=re.sub("\?","",transt)
                transt=re.sub("\.","",transt)
                transt=re.sub("(%.+ )?","",transt)
                #Additional normalize
                transt=re.sub("(%.+$)?","",transt)
                transt=re.sub("%","",transt)
                transt=re.sub("(-|~)"," ",transt)
                transt=re.sub("\!","",transt)
                transt=re.sub("'"," ",transt)
                transt=re.sub("\"","",transt)
                #
                transt=re.sub("/","",transt)
                transt=re.sub("[1-9]+","Replacedval",transt)
                transt=transt.lower()
                                    
                words=transt.split(" ")
                for word in words:
                    #Additional normalization
                    lmtr=nltk.stem.wordnet.WordNetLemmatizer()
                    word=lmtr.lemmatize(word)

                    dictVocabInTrain[word]=0
        for call in datasetDev:
            for (uttr,label) in call:
                trans=uttr["transcript"]
                transt=re.sub("\,","",trans)
                transt=re.sub("\?","",transt)
                transt=re.sub("\.","",transt)
                transt=re.sub("(%.+ )?","",transt)
                #Additional normalize
                transt=re.sub("(%.+$)?","",transt)
                transt=re.sub("%","",transt)
                transt=re.sub("(-|~)"," ",transt)
                transt=re.sub("\!","",transt)
                transt=re.sub("'"," ",transt)
                transt=re.sub("\"","",transt)
                #
                transt=re.sub("/","",transt)
                transt=re.sub("[1-9]+","Replacedval",transt)
                transt=transt.lower()
                
                words=transt.split(" ")
                for word in words:
                    #Additional normalization
                    lmtr=nltk.stem.wordnet.WordNetLemmatizer()
                    word=lmtr.lemmatize(word)

                    if word not in dictVocabInTrain:
                        print word.encode("utf-8")
                        numberOfOOVCaseInTrain2Dev+=1
        print "Number of OOV case in Train -> Dev situation = " + str(numberOfOOVCaseInTrain2Dev)
        print "\n\n\n\n\n"
        for call in datasetDev:
            for (uttr,label) in call:
                trans=uttr["transcript"]
                transt=re.sub("\,","",trans)
                transt=re.sub("\?","",transt)
                transt=re.sub("\.","",transt)
                transt=re.sub("(%.+ )?","",transt)
                #Additional normalize
                transt=re.sub("(%.+$)?","",transt)
                transt=re.sub("%","",transt)
                transt=re.sub("(-|~)"," ",transt)
                transt=re.sub("\!","",transt)
                transt=re.sub("'"," ",transt)
                transt=re.sub("\"","",transt)
                #
                transt=re.sub("/","",transt)
                transt=re.sub("[1-9]+","Replacedval",transt)                    
                transt=transt.lower()

                words=transt.split(" ")
                for word in words:
                    #Additional normalization
                    lmtr=nltk.stem.wordnet.WordNetLemmatizer()
                    word=lmtr.lemmatize(word)
                    
                    dictVocabInDev[word]=0
        for call in datasetTrain:
            for (uttr,label) in call:
                trans=uttr["transcript"]
                transt=re.sub("\,","",trans)
                transt=re.sub("\?","",transt)
                transt=re.sub("\.","",transt)
                transt=re.sub("(%.+ )?","",transt)
                #Additional normalize
                transt=re.sub("(%.+$)?","",transt)
                transt=re.sub("%","",transt)
                transt=re.sub("(-|~)"," ",transt)
                transt=re.sub("\!","",transt)
                transt=re.sub("'"," ",transt)
                transt=re.sub("\"","",transt)
                #
                transt=re.sub("/","",transt)
                transt=re.sub("[1-9]+","Replacedval",transt)
                transt=transt.lower()

                words=transt.split(" ")
                for word in words:
                    #Additional normalization
                    lmtr=nltk.stem.wordnet.WordNetLemmatizer()
                    word=lmtr.lemmatize(word)

                    if word not in dictVocabInDev:
                        print word.encode("utf-8")
                        numberOfOOVCaseInDev2Train+=1            
        print "Number of OOV case in Dev -> Train situation = " + str(numberOfOOVCaseInDev2Train)
        
    #-How many frame_label are unseen between train and dev data?
    #-So many, train -> dev 96/313 (unseen/all in dev), dev -> train 346/563 (unseen/all in train)
    isCountUnseenframeLabel=False
    dictTopicSlotValueTrain=[]
    numUnseenframeLabel=0
    alreadychecked=[]
    dictTopicSlotValueDev={}
    if isCountUnseenframeLabel:
        for call in datasetTrain:
            for (uttr,label) in call:
                if "frame_label" in label:
                    for slot in label["frame_label"].keys():
                        for value in label["frame_label"][slot]:
                            dictTopicSlotValueTrain.append(slot+value)
        for call in datasetDev:
            for (uttr,label) in call:
                if "frame_label" in label:
                    for slot in label["frame_label"].keys():
                        for value in label["frame_label"][slot]:
                            dictTopicSlotValueDev[(slot+value)]=0
                            if (slot+value) not in dictTopicSlotValueTrain:
                                if (slot+value) not in alreadychecked:
                                    numUnseenframeLabel+=1
                                    alreadychecked.append((slot+value))
        print "Number of Unseen label train -> dev = " + str(numUnseenframeLabel)
        print "Ratio (unseen/all in dev) = " + str(numUnseenframeLabel) + "/" + str(len(dictTopicSlotValueDev.keys()))

        dictTopicSlotValueDev=[]
        numUnseenframeLabel=0
        alreadychecked=[]
        dictTopicSlotValueTrain={}
        for call in datasetDev:
            for (uttr,label) in call:
                if "frame_label" in label:
                    for slot in label["frame_label"].keys():
                        for value in label["frame_label"][slot]:
                            dictTopicSlotValueDev.append(slot+value)
        for call in datasetTrain:
            for (uttr,label) in call:
                if "frame_label" in label:
                    for slot in label["frame_label"].keys():
                        for value in label["frame_label"][slot]:
                            dictTopicSlotValueTrain[(slot+value)]=0
                            if (slot+value) not in dictTopicSlotValueDev:
                                if (slot+value) not in alreadychecked:
                                    numUnseenframeLabel+=1
                                    alreadychecked.append((slot+value))
        print "Number of Unseen label dev -> train = " + str(numUnseenframeLabel)
        print "Ratio (unseen/all in train) = " + str(numUnseenframeLabel) + "/" + str(len(dictTopicSlotValueTrain.keys()))
コード例 #8
0
    def __init__(self,
                 tagsets=None,
                 nameOfODictPickle=None,
                 nameOfIDictPickle=None,
                 nameOfLSTMFile=None,
                 NameOfLearnedD2VFile="LearnedDoc2Vec.d2v"):
        #Print out Experimenal setup
        print "In both of learning and tracking,"
        if self.isStoreFrameOutputedByLSTMAtEachTurnInSubDialog:
            print "the tracker store output of LSTM at each turn in Sub.dial,"
        else:
            print "the tracker doesn't store output of LSTM at each turn in Sub.dial,"
        if self.isIgnoreUtterancesNotRelatedToMainTask:
            print "and tracker ignore the utterance which is not related to main task."
        else:
            print "and tracker doesn't ignore the utterance which is not related to main task."
        if self.isSeparateDialogIntoSubDialog:
            print "and tracker consider one subdialog at one sequence."
        else:
            print "and tracker consider one dialog at one sequence."
        if self.isCombineResultWithBaseline:
            print "and the output of LSTM is combined with that of baseline."
        else:
            print "and the output of LSTM is not combined with that of baseline."
        if self.isEnableToUseM1sFeature:
            print "and features made by M1s are used in input."
        else:
            print "and features made by M1s are not used in input."
        if self.isUseSentenceRepresentationInsteadofBOW:
            print "and distributed sentence reprentation is used instead of BOW and meta info."
        else:
            print "and BOW and meta indo is used for sentense feature, and Distributed sentence reprentation is not used."

        #Variables for tracking state
        self.LSTM = None
        self.dictOut = None
        self.dictIn = None
        if nameOfLSTMFile is not None:
            print "Load LSTM network file from " + nameOfLSTMFile
            self.LSTM = NetworkReader.readFrom(nameOfLSTMFile)
            assert self.LSTM is not None, "Failed to read LSTM"
        if nameOfIDictPickle is not None:
            print "Load input dictionary file from " + nameOfIDictPickle
            f = open(nameOfIDictPickle, "r")
            self.dictIn = pickle.load(f)
            f.close()
            assert self.dictIn is not None, "Failed to read Input dictionary"

        if nameOfODictPickle is not None:
            print "Load output dictionary file from " + nameOfODictPickle
            f = open(nameOfODictPickle, "r")
            self.dictOut = pickle.load(f)
            f.close()
            assert self.dictOut is not None, "Failed to read Output dictionary"
        if tagsets == None:
            self.tagsets = ontology_reader.OntologyReader(
                "scripts/config/ontology_dstc4.json").get_tagsets()
        else:
            self.tagsets = tagsets
        #Variables for fast processing
        #-1
        if LSTMWithBOWTracker.dictFuzzyMatchingResult == None:
            try:
                f = gzip.open(self.FileNameofdictFuzzyMatchingResult, "rb")
            except Exception:
                print "FuzzyMatchingResult.pic was not found. Dictionary are newly created. "
                LSTMWithBOWTracker.dictFuzzyMatchingResult = {}
            else:
                print "FuzzyMatchingResult.pic Dictionary are loaded."
                LSTMWithBOWTracker.dictFuzzyMatchingResult = pickle.load(f)
                f.close()
        #-2
        if self.isEnableToUseM1sFeature and self.isUtilizeM1VectorDictionary:
            try:
                f = gzip.open(self.FileNameofM1Vector, "rb")
            except Exception:
                print self.FileNameofM1Vector + "was not found. Dictionary are newly created. "
                self.dictM1Vector = {}
            else:
                print self.FileNameofM1Vector + " Dictionary are loaded."
                self.dictM1Vector = pickle.load(f)
                f.close()

        #
        try:
            f = open(self.FileNameofNumClassFeature, "rb")
            self.TOTALSIZEOFCLASSFeature = pickle.load(f)
            f.close()
            print "TSizeClasssFeature=" + str(self.TOTALSIZEOFCLASSFeature)
        except Exception:
            print self.FileNameofNumClassFeature + " was not found. learn() is required before tracking. "
        try:
            f = open(self.FileNameofNumSentenceFeature, "rb")
            self.TOTALSIZEOFSENTENCEFeature = pickle.load(f)
            f.close()
            print "TSizeSentenceFeature=" + str(
                self.TOTALSIZEOFSENTENCEFeature)
        except Exception:
            print self.FileNameofNumSentenceFeature + " was not found. learn() is required before tracking. "
        try:
            f = open(self.FileNameofNumM1Feature, "rb")
            self.TOTALSIZEOFM1DEFINEDFeature = pickle.load(f)
            f.close()
            print "TSizeM1Feature=" + str(self.TOTALSIZEOFM1DEFINEDFeature)
        except Exception:
            print self.FileNameofNumM1Feature + " was not found. learn() is required before tracking. "

        #
        if self.isUseSentenceRepresentationInsteadofBOW:
            self.d2v = LSTMWithBOWTracker.loadDoc2VecAndCheckAppropriateness(
                NameOfLearnedD2VFile)
        #
        self.frame = {}
        self.reset()
コード例 #9
0
def main(argv):
    install_path = os.path.abspath(
        os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
    utils_dirname = os.path.join(install_path, 'lib')

    sys.path.append(utils_dirname)
    from dataset_walker import dataset_walker

    parser = argparse.ArgumentParser(
        description='Evaluate output from an SLU system.')
    parser.add_argument('--dataset',
                        dest='dataset',
                        action='store',
                        metavar='DATASET',
                        required=True,
                        help='The dataset to analyze')
    parser.add_argument(
        '--dataroot',
        dest='dataroot',
        action='store',
        metavar='PATH',
        required=True,
        help='Will look for corpus in <destroot>/<dataset>/...')
    parser.add_argument('--pilotfile',
                        dest='pilotfile',
                        action='store',
                        metavar='JSON_FILE',
                        required=True,
                        help='File containing JSON output')
    parser.add_argument('--ontology',
                        dest='ontology',
                        action='store',
                        metavar='JSON_FILE',
                        required=True,
                        help='JSON Ontology file')
    parser.add_argument('--pilottask',
                        dest='pilottask',
                        action='store',
                        choices=['SLU', 'SAP', 'SLG', 'EES'],
                        required=True,
                        help='Target task')
    parser.add_argument('--roletype',
                        dest='roletype',
                        action='store',
                        choices=['GUIDE', 'TOURIST'],
                        required=True,
                        help='Target role')
    parser.add_argument('--scorefile',
                        dest='scorefile',
                        action='store',
                        metavar='JSON_FILE',
                        required=True,
                        help='File to write with CSV scoring data')

    args = parser.parse_args()

    sessions = dataset_walker(args.dataset,
                              dataroot=args.dataroot,
                              labels=True)

    system_output = json.load(open(args.pilotfile))

    tagsets = ontology_reader.OntologyReader(args.ontology).get_tagsets()

    stats = {}
    if args.pilottask == 'SLU':
        stats['semantic_tagged'] = {}
        stats['semantic_tagged']['detection'] = Stat_Precision_Recall()
        stats['semantic_tagged']['class'] = Stat_Precision_Recall()
        stats['semantic_tagged']['all'] = Stat_Precision_Recall()

    if args.pilottask == 'SLU' or args.pilottask == 'SAP':
        stats['speech_act'] = {}
        stats['speech_act']['act'] = Stat_Precision_Recall()
        stats['speech_act']['all'] = Stat_Precision_Recall()

    if args.pilottask == 'SLG' or args.pilottask == 'EES':
        stats['utt_transcriptions'] = {}
        stats['utt_transcriptions']['all'] = Stat_BLEU_AM_FM()

    for session, track_session in zip(sessions, system_output["sessions"]):
        session_id = session.log['session_id']

        log_utter_list = []
        label_utter_list = []

        for log_utter, label_utter in session:
            if (args.roletype == 'GUIDE' and log_utter['speaker']
                    == 'Guide') or (args.roletype == 'TOURIST'
                                    and log_utter['speaker'] == 'Tourist'):
                log_utter_list.append(log_utter)
                label_utter_list.append(label_utter)

        # now iterate through turns
        for log_utter, label_utter, track_utter in zip(
                log_utter_list, label_utter_list, track_session["utterances"]):
            for subtask in stats:
                if subtask == 'speech_act':
                    ref_sa_list = label_utter['speech_act']
                    pred_sa_list = track_utter['speech_act']
                    eval_acts(ref_sa_list, pred_sa_list, stats[subtask])
                elif subtask == 'semantic_tagged':
                    ref_tagged = ' '.join(label_utter['semantic_tagged'])
                    pred_tagged = track_utter['semantic_tagged']
                    eval_semantics(ref_tagged, pred_tagged, stats[subtask])
                elif subtask == 'utt_transcriptions':
                    ref = log_utter['transcript']
                    pred = track_utter['generated_sentence']
                    eval_utt(ref, pred, stats[subtask])

    csvfile = open(args.scorefile, 'w')
    print >> csvfile, ("task, subtask, schedule, stat, N, result")

    for subtask in stats:
        for schedule in stats[subtask]:
            for measure, N, result in stats[subtask][schedule].results():
                print >> csvfile, (
                    "%s, %s, %s, %s, %i, %s" %
                    (args.pilottask, subtask, schedule, measure, N, result))
    csvfile.close()