def extractPhrasePaireFeature(phrasedir): for lec in annotation.Lectures: path = phrasedir + str(lec)+ '/' fio.NewPath(path) for prompt in ['q1', 'q2']: prefix = os.path.join(path, '%s.%s.'%(prompt, method)) filename = path + prompt + sim_exe print filename featureset = [] feature_extractor = Similarity(prefix) phrasefile = os.path.join(path, "%s.%s.key"%(prompt, method)) phrases = fio.LoadList(phrasefile) for p1 in phrases: for p2 in phrases: featureset.append((feature_extractor.get_features(p1, p2), 0.0, {'p1':p1, 'p2':p2})) fio.SaveDict2Json(featureset, filename) feature_extractor.save()
def extractPhrasePaireFeature(phrasedir): for lec in annotation.Lectures: path = phrasedir + str(lec) + '/' fio.NewPath(path) for prompt in ['q1', 'q2']: prefix = os.path.join(path, '%s.%s.' % (prompt, method)) filename = path + prompt + sim_exe print filename featureset = [] feature_extractor = Similarity(prefix) phrasefile = os.path.join(path, "%s.%s.key" % (prompt, method)) phrases = fio.LoadList(phrasefile) for p1 in phrases: for p2 in phrases: featureset.append( (feature_extractor.get_features(p1, p2), 0.0, { 'p1': p1, 'p2': p2 })) fio.SaveDict2Json(featureset, filename) feature_extractor.save()
def extractPhrasePaireFromAnnotation(phrasedir, annotators, id): for doc, lec, annotator in annotation.generate_all_files( annotation.datadir + 'json/', '.json', anotators=annotators, lectures=annotation.Lectures): print doc #load task task = annotation.Task() task.loadjson(doc) path = phrasedir + str(lec) + '/' fio.NewPath(path) for prompt in ['q1', 'q2']: prefix = os.path.join(path, '%s.%s.' % (prompt, method)) filename = path + prompt + sim_exe print filename featureset = [] feature_extractor = Similarity(prefix) phrase_annotation = task.get_phrase_annotation(prompt) #positive examples for rank1 in sorted(phrase_annotation): for rank2 in sorted(phrase_annotation): if rank1 == rank2: score = 1.0 else: score = 0.0 phrases1 = phrase_annotation[rank1] phrases2 = phrase_annotation[rank2] for phrasedict1 in phrases1: p1 = phrasedict1['phrase'].lower().strip() for phrasedict2 in phrases2: p2 = phrasedict2['phrase'].lower().strip() featureset.append( (feature_extractor.get_features(p1, p2), score, { 'p1': p1, 'p2': p2 })) fio.SaveDict2Json(featureset, filename) feature_extractor.save()
def extractPhrasePaireFromAnnotation(phrasedir, annotators, id): for doc, lec, annotator in annotation.generate_all_files(annotation.datadir + 'json/', '.json', anotators = annotators, lectures=annotation.Lectures): print doc #load task task = annotation.Task() task.loadjson(doc) path = phrasedir + str(lec)+ '/' fio.NewPath(path) for prompt in ['q1', 'q2']: prefix = os.path.join(path, '%s.%s.'%(prompt, method)) filename = path + prompt + sim_exe print filename featureset = [] feature_extractor = Similarity(prefix) phrase_annotation = task.get_phrase_annotation(prompt) #positive examples for rank1 in sorted(phrase_annotation): for rank2 in sorted(phrase_annotation): if rank1 == rank2: score = 1.0 else: score = 0.0 phrases1 = phrase_annotation[rank1] phrases2 = phrase_annotation[rank2] for phrasedict1 in phrases1: p1 = phrasedict1['phrase'].lower().strip() for phrasedict2 in phrases2: p2 = phrasedict2['phrase'].lower().strip() featureset.append((feature_extractor.get_features(p1, p2), score, {'p1':p1, 'p2':p2})) fio.SaveDict2Json(featureset, filename) feature_extractor.save()