def extractPhrasePaireFeature(phrasedir):
    for lec in annotation.Lectures:
        path = phrasedir + str(lec)+ '/'
        fio.NewPath(path)
        
        for prompt in ['q1', 'q2']:
            prefix = os.path.join(path, '%s.%s.'%(prompt, method))
            filename = path + prompt + sim_exe
            print filename
            
            featureset = []
            
            feature_extractor = Similarity(prefix)
            
            phrasefile = os.path.join(path, "%s.%s.key"%(prompt, method))
            
            phrases = fio.LoadList(phrasefile)
            
            for p1 in phrases:
                for p2 in phrases:
                    featureset.append((feature_extractor.get_features(p1, p2), 0.0, {'p1':p1, 'p2':p2}))
            
            fio.SaveDict2Json(featureset, filename)
            
            feature_extractor.save()
Beispiel #2
0
def extractPhrasePaireFeature(phrasedir):
    for lec in annotation.Lectures:
        path = phrasedir + str(lec) + '/'
        fio.NewPath(path)

        for prompt in ['q1', 'q2']:
            prefix = os.path.join(path, '%s.%s.' % (prompt, method))
            filename = path + prompt + sim_exe
            print filename

            featureset = []

            feature_extractor = Similarity(prefix)

            phrasefile = os.path.join(path, "%s.%s.key" % (prompt, method))

            phrases = fio.LoadList(phrasefile)

            for p1 in phrases:
                for p2 in phrases:
                    featureset.append(
                        (feature_extractor.get_features(p1, p2), 0.0, {
                            'p1': p1,
                            'p2': p2
                        }))

            fio.SaveDict2Json(featureset, filename)

            feature_extractor.save()
Beispiel #3
0
def extractPhrasePaireFromAnnotation(phrasedir, annotators, id):
    for doc, lec, annotator in annotation.generate_all_files(
            annotation.datadir + 'json/',
            '.json',
            anotators=annotators,
            lectures=annotation.Lectures):
        print doc

        #load task
        task = annotation.Task()
        task.loadjson(doc)

        path = phrasedir + str(lec) + '/'
        fio.NewPath(path)

        for prompt in ['q1', 'q2']:
            prefix = os.path.join(path, '%s.%s.' % (prompt, method))
            filename = path + prompt + sim_exe
            print filename

            featureset = []

            feature_extractor = Similarity(prefix)

            phrase_annotation = task.get_phrase_annotation(prompt)

            #positive examples
            for rank1 in sorted(phrase_annotation):
                for rank2 in sorted(phrase_annotation):
                    if rank1 == rank2:
                        score = 1.0
                    else:
                        score = 0.0

                    phrases1 = phrase_annotation[rank1]
                    phrases2 = phrase_annotation[rank2]
                    for phrasedict1 in phrases1:
                        p1 = phrasedict1['phrase'].lower().strip()

                        for phrasedict2 in phrases2:
                            p2 = phrasedict2['phrase'].lower().strip()

                            featureset.append(
                                (feature_extractor.get_features(p1,
                                                                p2), score, {
                                                                    'p1': p1,
                                                                    'p2': p2
                                                                }))

            fio.SaveDict2Json(featureset, filename)

            feature_extractor.save()
def extractPhrasePaireFromAnnotation(phrasedir, annotators, id):
    for doc, lec, annotator in annotation.generate_all_files(annotation.datadir + 'json/', '.json', anotators = annotators, lectures=annotation.Lectures):
        print doc
        
        #load task
        task = annotation.Task()
        task.loadjson(doc)
        
        path = phrasedir + str(lec)+ '/'
        fio.NewPath(path)
        
        for prompt in ['q1', 'q2']:
            prefix = os.path.join(path, '%s.%s.'%(prompt, method))
            filename = path + prompt + sim_exe
            print filename
            
            featureset = []
            
            feature_extractor = Similarity(prefix)
            
            phrase_annotation = task.get_phrase_annotation(prompt)
            
            #positive examples
            for rank1 in sorted(phrase_annotation):
                for rank2 in sorted(phrase_annotation):
                    if rank1 == rank2:
                        score = 1.0
                    else:
                        score = 0.0
                                
                    phrases1 = phrase_annotation[rank1]
                    phrases2 = phrase_annotation[rank2]
                    for phrasedict1 in phrases1:
                        p1 = phrasedict1['phrase'].lower().strip()
                        
                        for phrasedict2 in phrases2:
                            p2 = phrasedict2['phrase'].lower().strip()
                            
                            featureset.append((feature_extractor.get_features(p1, p2), score, {'p1':p1, 'p2':p2}))
            
            fio.SaveDict2Json(featureset, filename)
            
            feature_extractor.save()