예제 #1
0
                                                   test_set_parse)
        true_labels_tst = ct.labelsToMultilabelT(
            test_set_parse.getInstancesTextsCategoriesFiltered(
                list(handLbl.classes_)), handLbl)
        if v:
            print "test matrix shape:" + str(tiMat_tst.shape)
            print "objects with labels: " + str(true_labels_tst.shape)
    #test predictor with test_set
#        lbls_predicted=predictor.predict(tiMat_tst)
        lbls_predictedWa = predictorWa.predict(tiMat_tst)
        if v:
            #            print "labels predicted shape:"+str(lbls_predicted.shape)
            print "labels predicted shape:" + str(lbls_predictedWa.shape)
    #evaluate
#        acc_score,prec_score,rec_score,f1_score=ct.evaluateMultilabelPrediction(true_labels_tst,lbls_predicted)
        acc_scoreWa, prec_scoreWa, rec_scoreWa, f1_scoreWa = ct.evaluateMultilabelPrediction(
            true_labels_tst, lbls_predictedWa)
        if v:
            #            print "scores[accuracy,precision,recall,f1-measure]:"
            #            print "s--"+str(acc_score)+","+str(prec_score)+","+str(rec_score)+","+str(f1_score)
            print "scoresWa[accuracy,precision,recall,f1-measure]:"
            print "sw--" + str(acc_scoreWa) + "," + str(
                prec_scoreWa) + "," + str(rec_scoreWa) + "," + str(f1_scoreWa)
    #register the results
#        acc_curve.append([curr_year,year_tst,acc_score])
#        prec_curve.append([curr_year,year_tst,prec_score])
#        rec_curve.append([curr_year,year_tst,rec_score])
#        f1_curve.append([curr_year,year_tst,f1_score])

        acc_curveWA.append([curr_year, year_tst, acc_scoreWa])
        prec_curveWA.append([curr_year, year_tst, prec_scoreWa])
        rec_curveWA.append([curr_year, year_tst, rec_scoreWa])
        tiMat_tst, cntMat_tst = ct.transformToFeat(featC, featTI,
                                                   test_set_parse)
        true_labels_tst = ct.labelsToMultilabelT(
            test_set_parse.getInstancesTextsCategoriesFiltered(
                list(handLbl.classes_)), handLbl)
        if v:
            print "test matrix shape:" + str(tiMat_tst.shape)
            print "objects with labels: " + str(true_labels_tst.shape)
    #test predictor with test_set
        lbls_predicted = predictor.predict(tiMat_tst)
        #        lbls_predictedWa=predictorWa.predict(tiMat_tst)
        if v:
            print "labels predicted shape:" + str(lbls_predicted.shape)
#            print "labels predicted shape:"+str(lbls_predictedWa.shape)
#evaluate
        acc_score, prec_score, rec_score, f1_score = ct.evaluateMultilabelPrediction(
            true_labels_tst, lbls_predicted)
        #        acc_scoreWa,prec_scoreWa,rec_scoreWa,f1_scoreWa=ct.evaluateMultilabelPrediction(true_labels_tst,lbls_predictedWa)

        output = open(
            args.output + "/" + blog + '_' + str(curr_year) + '_' +
            str(year_tst) + '_predictions.pkl', 'wb')
        print list(handLbl.classes_)
        cPickle.dump([true_labels_tst, lbls_predicted], output)
        output.close()

        if v:
            print "scores[accuracy,precision,recall,f1-measure]:"
            print "s--" + str(acc_score) + "," + str(prec_score) + "," + str(
                rec_score) + "," + str(f1_score)
#            print "scoresWa[accuracy,precision,recall,f1-measure]:"
#            print "sw--"+str(acc_scoreWa)+","+str(prec_scoreWa)+","+str(rec_scoreWa)+","+str(f1_scoreWa)
예제 #3
0
        tiMat_tst, cntMat_tst = ct.transformToFeatYr(featC, featTI,
                                                     test_set_parse, yrr)
        true_labels_tst = ct.labelsToMultilabelT(
            test_set_parse.getInstancesTextsCategoriesFiltered(
                list(handLbl.classes_)), handLbl)
        if v:
            print "test matrix shape:" + str(tiMat_tst.shape)
            print "objects with labels: " + str(true_labels_tst.shape)
    #test predictor with test_set
        lbls_predicted = predictor.predict(tiMat_tst)
        lbls_predictedWa = predictorWa.predict(tiMat_tst)
        if v:
            print "labels predicted shape:" + str(lbls_predicted.shape)
            print "labels predicted shape:" + str(lbls_predictedWa.shape)
    #evaluate
        acc_score, prec_score, rec_score, f1_score = ct.evaluateMultilabelPrediction(
            lbls_predicted, true_labels_tst)
        acc_scoreWa, prec_scoreWa, rec_scoreWa, f1_scoreWa = ct.evaluateMultilabelPrediction(
            lbls_predictedWa, true_labels_tst)
        #cnfMTX=ct.confusionMatrix(lbls_predicted,true_labels_tst,list(handLbl.classes_))
        #cnfMTXWa=ct.confusionMatrix(lbls_predictedWa,true_labels_tst,list(handLbl.classes_))
        #save prediccions and confusion matrix
        #(confusion matrix part removed the multilabel indicator is not supported I will have to make this on my own)
        output = open(
            resDirSV + blog + '_' + str(curr_year) + '_' + str(year_tst) +
            '_predictions.pkl', 'wb')
        print list(handLbl.classes_)
        cPickle.dump([true_labels_tst, lbls_predicted, lbls_predictedWa],
                     output)
        output.close()

        #        outputCnfMt = open(resDirSV+blog+'_'+str(curr_year)+'_'+str(year_tst)+'_confusion.pkl', 'wb')