try:
        aux_file = sys.argv[1]
    except:
        print "Argument expected: weka-csv (inst#,actual,predicted,error,prediction) (with header) file with labels"
        sys.exit(-1)
    try:
        label_no = int(sys.argv[2])
    except:
        print "Argument expected: label to be overwritten no."
        sys.exit(-1)
 

    print "Loading labels' (weka-csv) file:",  aux_file
    #inst#,actual,predicted,error,prediction
    lines = open(aux_file).readlines()
    labels2bool = list( line.split(",")[2].find("present")>=0 for line in lines[1:] if len(line.strip())>0 )
    print "",len(labels2bool),"rows loaded"
    #print labels2bool
    
    labels = []
    for ocur in labels2bool:
        if ocur:
            labels.append([label_no])
        else:
            labels.append([])

    aux_file = aux_file + "_jrs.txt"
    print "Writing to file:", aux_file
    jrs_io.store_labels(open(aux_file,"w"), labels)
    
        print "Argument expected: path to a labels' file."
        sys.exit(-1)
    try:
        labels2_path = sys.argv[2]
    except:
        print "Argument expected: path to a second labels' file."
        sys.exit(-1)
    try:
        out_path = sys.argv[3]
    except:
        print "Argument expected: output labels' file."
        sys.exit(-1)

    print "Loading labels' 1 file:", labels1_path
    labels1 = jrs_io.load_labels(open(labels1_path))

    try:
        print "Loading labels' 2 file:", labels2_path
        labels2 = jrs_io.load_labels(open(labels2_path))
    except:
        print "Failed loading file", labels2_path
        print "Using empty file"
        labels2 = [[] for i in xrange(len(labels1))]

    labels12 = []
    for i in xrange(len(labels1)):
        labels12.append(sorted(set(labels1[i] + labels2[i])))

    print "Writing to 1+2 file:", out_path
    jrs_io.store_labels(open(out_path, "w"), labels12)
     sys.exit(-1)
 
 print "The program shuffles distances and trainingLabels."
 
 print "Loading labels' file:",  labels_path
 labels = jrs_io.load_labels(open(labels_path))
 n = len(labels)
 print "",n," labels' sets loaded."
 
 order = range(n)
 random.shuffle(order)
 print "Random order:", order[:30],"..."
 
 print "Shuffling labels..."
 labels_shuffled = [labels[ix] for ix in order]
 jrs_io.store_labels(open(labels_path+"_shuffled","w"), labels_shuffled)
 
 print "Loading distances' file:",  distance_matrix_path
 distances = jrs_io.load_data(open(distance_matrix_path), lambda x: x)
 try: print "",len(distances), "x",len(distances[0])
 except: pass
 
 print "Extending order..."    
 order = order + range(n, len(distances))
 print "Extended order:", order    
 
 print "Shuffling columns"
 distances_tmp = []
 for row in distances:
     new_row = [ row[ix] for ix in order ]
     distances_tmp.append(new_row)
        sys.exit(-1)

    print "The program shuffles distances and trainingLabels."

    print "Loading labels' file:", labels_path
    labels = jrs_io.load_labels(open(labels_path))
    n = len(labels)
    print "", n, " labels' sets loaded."

    order = range(n)
    random.shuffle(order)
    print "Random order:", order[:30], "..."

    print "Shuffling labels..."
    labels_shuffled = [labels[ix] for ix in order]
    jrs_io.store_labels(open(labels_path + "_shuffled", "w"), labels_shuffled)

    print "Loading distances' file:", distance_matrix_path
    distances = jrs_io.load_data(open(distance_matrix_path), lambda x: x)
    try:
        print "", len(distances), "x", len(distances[0])
    except:
        pass

    print "Extending order..."
    order = order + range(n, len(distances))
    print "Extended order:", order

    print "Shuffling columns"
    distances_tmp = []
    for row in distances:
        print "Argument expected: path to a second labels' file."
        sys.exit(-1)        
    try:
        out_path = sys.argv[3]
    except:
        print "Argument expected: output labels' file."
        sys.exit(-1)      
                        
    print "Loading labels' 1 file:",  labels1_path
    labels1 = jrs_io.load_labels(open(labels1_path))
    
    try:
        print "Loading labels' 2 file:",  labels2_path
        labels2 = jrs_io.load_labels(open(labels2_path))
    except:
        print "Failed loading file", labels2_path
        print "Using empty file"
        labels2 = [[] for i in xrange(len(labels1))]
    
    labels12 = []
    for i in xrange(len(labels1)):
         labels12.append( sorted(set(labels1[i]+labels2[i])) )
         
         
    print "Writing to 1+2 file:", out_path
    jrs_io.store_labels(open(out_path,"w"), labels12)

     
         
        
        
예제 #6
0
            #subs = max(votes[0] - diff,0.0)
            #print "","subs=",subs
            #votes_for_yes = [v-subs for v in votes_for_yes]
            #votes_for_no = [v-subs for v in votes_for_no]
            #print "","sample_no:",sample_no," label:",label,"votes_for_yes:",votes_for_yes,"votes_for_no:",votes_for_no

            try:
                #yes = float(sum(votes_for_yes))/len(votes_for_yes)
                yes = sum(votes_for_yes)
            except:
                yes = 0.0
            try:
                #no = float(sum(votes_for_no))/len(votes_for_no)
                no = sum(votes_for_no)
            except:
                no = 0.0

            if yes > no:
                selected_sample_labels.append(label)

        predicted_labels.append(sorted(selected_sample_labels))
        print "", "all labels:", all_sample_labels, " -> sel:", sorted(
            selected_sample_labels), "\n"

    print "STORING TO FILE:", out_path
    import sys
    sys.path.append(r'../')
    sys.path.append(r'../../')
    import jrs_io
    jrs_io.store_labels(open(out_path, "w"), predicted_labels)
예제 #7
0
    if FINALTEST:
        start = time.clock()
        print "Final predicting..."
        print " loading from file:", distance_matrix_path, " in range", FINALTEST_START, "-", FINALTEST_END
        predicted_labels = []
        for i, line in enumerate(open(distance_matrix_path).xreadlines()):
            if i % 1000 == 0: print "", i, "rows processed..."
            if i >= FINALTEST_END: break
            if i < FINALTEST_START: continue
            row = [cast_method(x) for x in line.split()]
            final2training_distances = row[training_range[0]:training_range[1]]
            predicted_labels.append(
                multilabel_classifier(final2training_distances,
                                      training_labels))
        jrs_io.store_labels(open(FINAL_RESULT_PATH, "w"), predicted_labels)
        lcount = [len(ll) for ll in predicted_labels]
        print " avg labels in predicted:", float(sum(lcount)) / (len(lcount))
        print " done in", (time.clock() - start), "sec..."
        print "------------------------------------------"
    else:

        def eval():
            start = time.clock()
            print "Calculating predictions of ", len(
                testing_labels), " labels' sets..."
            predicted_labels = jrs_multilabel_classifier.classify_multilabel(
                testing2training_distances, training_labels,
                multilabel_classifier)
            accuracy, precision, recall, hammingloss, subset01loss, fmeasure = jrs_evaluation.jrs_evaluate(
                testing_labels, predicted_labels)
 print "------------------------------------------"
         
 print "Classifying file:",  features_matrix_path
 f = open(features_matrix_path)
 predicted_labels = []
 for i,line in enumerate(f.xreadlines()):
     if i%1000==0: print "",i,"..."
     
     row = [int(x) for x in line.split()]
     ll = []
     for label,feature_ixs in label2feature_ixs.iteritems():
         says_yes = sum(row[ix]>0 for ix in feature_ixs)
         if says_yes >= len(feature_ixs)*MIN_FRACTION_OF_VOTES:
             ll.append(label)        
     ll = sorted(ll)
     
     predicted_labels.append(ll)
             
     print "",i," oracle",labels[i]," pred",ll
     print "","len=",len(labels[:(i+1)]), len(predicted_labels)
     accuracy, precision, recall, hammingloss, subset01loss, fmeasure =  jrs_evaluation.jrs_evaluate(labels[:(i+1)], predicted_labels)
     print "\t\t\t\t\t","%.2f" %precision,"%.2f" %recall,"%.2f" %fmeasure
 print "------------------------------------------"
 
 accuracy, precision, recall, hammingloss, subset01loss, fmeasure =  jrs_evaluation.jrs_evaluate(labels, predicted_labels)
 print "\t\t\t\t\t","%.2f" %precision,"%.2f" %recall,"%.2f" %fmeasure
 
 print "Wrining results to", out_path 
 jrs_io.store_labels(open(out_path,"w"), predicted_labels)
 
 
예제 #9
0
 #############################################################################################################################
 #############################################################################################################################
 
 if FINALTEST:
     start = time.clock()
     print "Final predicting..."
     print " loading from file:",distance_matrix_path," in range",FINALTEST_START,"-",FINALTEST_END 
     predicted_labels = []
     for i,line in enumerate(open(distance_matrix_path).xreadlines()):
         if i%1000 == 0: print "",i,"rows processed..."
         if i>=FINALTEST_END: break
         if i<FINALTEST_START: continue 
         row = [cast_method(x) for x in line.split()]
         final2training_distances = row[training_range[0]:training_range[1]]
         predicted_labels.append(multilabel_classifier(final2training_distances, training_labels))
     jrs_io.store_labels(open(FINAL_RESULT_PATH,"w"), predicted_labels)
     lcount = [len(ll) for ll in predicted_labels]
     print " avg labels in predicted:", float(sum(lcount))/(len(lcount))
     print " done in", (time.clock() - start), "sec..."
     print "------------------------------------------"
 else:
     def eval():
         start = time.clock()    
         print "Calculating predictions of ",len(testing_labels)," labels' sets..."
         predicted_labels = jrs_multilabel_classifier.classify_multilabel(testing2training_distances, training_labels, multilabel_classifier)
         try:
             print testing_labels[:10],"\n", predicted_labels[:10]                                    
             accuracy, precision, recall, hammingloss, subset01loss, fmeasure =  jrs_evaluation.jrs_evaluate(testing_labels, predicted_labels)
         except:
             print "[knn] Error in jrs_evaluation.jrs_evaluate(testing_labels, predicted_labels):",testing_labels, predicted_labels
         print " accuracy:", accuracy,"\n precision:", precision,"\n recall:", recall,"\n fmeasure:", fmeasure                
예제 #10
0
    try:
        aux_file = sys.argv[1]
    except:
        print "Argument expected: weka-csv (inst#,actual,predicted,error,prediction) (with header) file with labels"
        sys.exit(-1)
    try:
        label_no = int(sys.argv[2])
    except:
        print "Argument expected: label to be overwritten no."
        sys.exit(-1)

    print "Loading labels' (weka-csv) file:", aux_file
    #inst#,actual,predicted,error,prediction
    lines = open(aux_file).readlines()
    labels2bool = list(
        line.split(",")[2].find("present") >= 0 for line in lines[1:]
        if len(line.strip()) > 0)
    print "", len(labels2bool), "rows loaded"
    #print labels2bool

    labels = []
    for ocur in labels2bool:
        if ocur:
            labels.append([label_no])
        else:
            labels.append([])

    aux_file = aux_file + "_jrs.txt"
    print "Writing to file:", aux_file
    jrs_io.store_labels(open(aux_file, "w"), labels)