Exemple #1
0
def solvegraph_leave_one_lecture_out(phrasedir, modelname='svr'):
    lectures = annotation.Lectures

    oslom = OSLOM(oslom_parms)

    if modelname == 'svm':
        weighted = False
        undirect = True
    elif modelname == 'lsa':
        weighted = False
        undirect = True
    else:  #svr, lsa
        weighted = True
        undirect = True

    for i, lec in enumerate(lectures):
        path = os.path.join(phrasedir, str(lec))

        for q in ['q1', 'q2']:
            #write the output
            phrasefile = os.path.join(path, "%s.%s.key" % (q, method))
            phrases = fio.LoadList(phrasefile)

            netgraphfile = os.path.join(
                path, "%s.%s.%s%s" % (q, method, modelname, net_exe))

            oslom.solve_graph(netgraphfile, undirect, weighted)
Exemple #2
0
def extractPhrasePaireFeature(phrasedir):
    for lec in annotation.Lectures:
        path = phrasedir + str(lec) + '/'
        fio.NewPath(path)

        for prompt in ['q1', 'q2']:
            prefix = os.path.join(path, '%s.%s.' % (prompt, method))
            filename = path + prompt + sim_exe
            print filename

            featureset = []

            feature_extractor = Similarity(prefix)

            phrasefile = os.path.join(path, "%s.%s.key" % (prompt, method))

            phrases = fio.LoadList(phrasefile)

            for p1 in phrases:
                for p2 in phrases:
                    featureset.append(
                        (feature_extractor.get_features(p1, p2), 0.0, {
                            'p1': p1,
                            'p2': p2
                        }))

            fio.SaveDict2Json(featureset, filename)

            feature_extractor.save()
Exemple #3
0
 def __init__(self, key_prefix, sum_prefix, N):
     '''
     N is number of annotators
     '''
     
     self.key_prefix = key_prefix
     self.sum_prefix = sum_prefix
     self.N = N
     
     #load phrase color map
     phrasefile = key_prefix + phrase_exe
     phrases = fio.LoadList(phrasefile)
     
     colorfile = key_prefix + color_exe
     color_map = fio.LoadDictJson(colorfile)
     
     phrase_color_map = self.combine_phrase_color(phrases, color_map)
     
     #get phrase summary color map
     sumfile = sum_prefix + sum_exe
     summaries = fio.LoadList(sumfile)
     
     self.summary_color = self.get_summary_color(summaries, phrase_color_map)
     
     #get summary count
     sumcountfile =  sum_prefix + sum_count_exe
     self.summary_no = [int(x) for x in fio.LoadList(sumcountfile)]
     
     assert(len(self.summary_color) == len(self.summary_no))
     
     #load human_summary color map
     self.ref_color = []
     
     for i in range(N):
         d = {}
         ref_sumcolor_file = '%s%s.%d.color'%(sum_prefix, ref_exe, i)
         ref_sumno_file = '%s%s.%d.no'%(sum_prefix, ref_exe, i)
         
         for color, no in zip(fio.LoadList(ref_sumcolor_file), fio.LoadList(ref_sumno_file)):
             d[int(color)] = int(no)
         
         self.ref_color.append(d)
Exemple #4
0
def writegraph_leave_one_lecture_out_lsa(model_dir,
                                         phrasedir,
                                         modelname='lsa'):
    lectures = annotation.Lectures

    for i, lec in enumerate(lectures):
        test = [lec]

        path = os.path.join(phrasedir, str(lec))

        for q in ['q1', 'q2']:
            #write the output
            phrasefile = os.path.join(path, "%s.%s.key" % (q, method))
            phrases = fio.LoadList(phrasefile)

            if modelname == 'lsa':
                similarties_results = os.path.join(
                    path, "%s.%s.optimumComparerLSATasa" % (q, method))
            elif modelname == 'svm':
                similarties_results = os.path.join(path,
                                                   "%s.%s.svm" % (q, method))

            simhead, simbody = fio.ReadMatrix(similarties_results,
                                              hasHead=True)

            assert (len(simhead) == len(phrases))

            body = []
            for i, p1 in enumerate(phrases):
                for j, p2 in enumerate(phrases):
                    if j <= i:
                        continue  #undirect graph

                    score = simbody[i][j]

                    score = float(score) if score != 'NaN' else 0.0

                    #if score == 0.0: score = 0.000001
                    #if score < 0.5: continue
                    if score == 0.0: continue

                    #row = [i, j, '%f'%score]
                    row = [i, j]

                    body.append(row)

            output = os.path.join(
                path, "%s.%s.%s%s" % (q, method, modelname, net_exe))
            fio.WriteMatrix(output, body)
Exemple #5
0
def predict_IE256(train_course, model_dir, phrasedir, modelname='svm'):
    sim_extractor = Similarity()
    allfeatures = sorted(sim_extractor.features.keys())

    features = allfeatures

    name = '_'.join(features)

    lectures = annotation.Lectures

    for i, lec in enumerate(lectures):
        test = [lec]

        print test
        model_file = os.path.join(model_dir,
                                  '%s_%s.model' % (train_course, name))

        with open(model_file, 'rb') as handle:
            clf = pickle.load(handle)

        path = os.path.join(phrasedir, str(lec))

        for q in ['q1', 'q2']:
            test_X, test_Y = combine_files_test(phrasedir,
                                                test,
                                                features,
                                                prompts=[q])
            predict_Y = clf.predict(test_X)

            #write the output
            phrasefile = os.path.join(path, "%s.%s.key" % (q, method))
            phrases = fio.LoadList(phrasefile)

            assert (len(predict_Y) == len(phrases) * len(phrases))

            k = 0
            body = []
            for p1 in phrases:
                row = []
                for p2 in phrases:
                    row.append(predict_Y[k])
                    k += 1
                body.append(row)

            output = os.path.join(path, "%s.%s.%s" % (q, method, modelname))
            fio.WriteMatrix(output, body, phrases)
Exemple #6
0
def readgraph_leave_one_lecture_out(phrasedir, modelname='svr'):
    lectures = annotation.Lectures

    oslom = OSLOM()

    if modelname == 'svr':
        weighted = True
        undirect = True
    else:
        weighted = False
        undirect = True

    for i, lec in enumerate(lectures):
        path = os.path.join(phrasedir, str(lec))

        for q in ['q1', 'q2']:
            #write the output
            phrasefile = os.path.join(path, "%s.%s.key" % (q, method))
            phrases = fio.LoadList(phrasefile)

            netgraphfile = os.path.join(
                path,
                "%s.%s.%s%s_oslo_files" % (q, method, modelname, net_exe),
                'tp')

            if not fio.IsExist(netgraphfile):  #no communities
                print netgraphfile
                communites = [[x] for x in range(len(phrases))]
            else:
                communites = oslom.readgraph_partitions(netgraphfile)

                #if len(communites) == 1:#break it
                #    communites = [[x] for x in range(len(phrases))]

            name = 'ct.%s.%s' % (modelname, 'default')
            output = os.path.join(
                path, "%s.cluster.kmedoids.sqrt.%s.%s" % (q, name, method))
            write_communite_to_clusters(communites, phrases, output)

            print "%d\t%s\t%d" % (lec, q, len(communites))
Exemple #7
0
def writegraph_leave_one_lecture_out(model_dir,
                                     phrasedir,
                                     modelname='svr',
                                     traincourse=None):
    from sklearn import svm
    from sklearn.metrics import mean_squared_error, precision_recall_fscore_support, accuracy_score
    import QPS_simlearning

    sim_extractor = Similarity()
    allfeatures = sorted(sim_extractor.features.keys())

    features = allfeatures

    name = '_'.join(features)

    lectures = annotation.Lectures

    for i, lec in enumerate(lectures):
        test = [lec]

        print test
        model_file = os.path.join(model_dir, '%d_%s.model' % (lec, name))
        #model_file = os.path.join(model_dir, '%s_%s.model'%('IE256_2016', name))

        with open(model_file, 'rb') as handle:
            clf = pickle.load(handle)

        path = os.path.join(phrasedir, str(lec))

        for q in ['q1', 'q2']:
            test_X, test_Y = QPS_simlearning.combine_files_test(phrasedir,
                                                                test,
                                                                features,
                                                                prompts=[q])
            predict_Y = clf.predict(test_X)

            #write the output
            phrasefile = os.path.join(path, "%s.%s.key" % (q, method))
            phrases = fio.LoadList(phrasefile)

            assert (len(predict_Y) == len(phrases) * len(phrases))

            k = 0
            body = []
            for i, p1 in enumerate(phrases):
                for j, p2 in enumerate(phrases):
                    if j <= i:
                        k += 1
                        continue  #undirect graph

                    if modelname == 'svm':
                        if predict_Y[k] == 1.0:
                            #row = [i,j, '%.1f'%predict_Y[k]]
                            row = [i, j]
                            body.append(row)
                    else:
                        row = [i, j, '%.2f' % predict_Y[k]]
                        body.append(row)

                    k += 1

            output = os.path.join(
                path, "%s.%s.%s%s" % (q, method, modelname, net_exe))
            fio.WriteMatrix(output, body)