labels = jrs_io.load_labels(open(labels_path))
    single_labels = list(set(reduce(lambda l1,l2: l1+l2, (ll for ll in labels))))
    #training_labels = labels[:training_size]
    n = len(labels)    
    print n," multi-labels sets loaded (",len(single_labels),"single labels:",single_labels,")..."
    print "Sample five labels:", labels[:5]
    print "------------------------------------------"
    
    print "Extracting label occurrence vectors"
    label2occurrences = dict( (label,label_ocur(labels, label)) for label in single_labels )
    #print label2occurrences    
    print "------------------------------------------"
    
    print "Loading features from file:",  features_matrix_path
    f = open(features_matrix_path)
    features = jrs_io.load_data(f, cast_method = float, numrows = LOAD_MAX_ROWS)
    print "","loaded", len(features),"x",len(features[0])
    print "------------------------------------------"

    print "Calculating and reporting to:", out_path
    fout = open(out_path, "w")
    for label in single_labels:
        print "","considering label:",label
        label_occurrences_vector = label2occurrences[label]
        
        indval_colix = [] 
        for colix in xrange(len(features[0])):             
            indval,precision,recall = INDICATOR(label_occurrences_vector, extract_col(features, colix))
            indval_colix.append( (indval,colix,precision,recall) )
                    
        fout.write(str(label)+";\t")
        
    print "Loading labels' file:",  labels_path
    labels = jrs_io.load_labels(open(labels_path))
    single_labels = list(set(reduce(lambda l1,l2: l1+l2, (ll for ll in labels))))
    n = len(labels)    
    print n," multi-labels sets loaded (",len(single_labels),"single labels:",single_labels,")..."
    print "------------------------------------------"
    
    print "Extracting label occurrence vectors"
    label2occurrences = dict( (label,label_ocur(labels, label)) for label in single_labels )        
    #print label2occurrences    
    print "------------------------------------------"

    print "Loading features from file:",  features_matrix_path
    f = open(features_matrix_path)
    features = jrs_io.load_data(f, cast_method = int, numrows = LOAD_MAX_ROWS)
    print "","loaded", len(features),"x",len(features[0])
    print "------------------------------------------"
    
    
    print "Loading features-ind file",featuresind_path
    print "Loading negfeaturesind_path file",negfeaturesind_path
    f1 = open(featuresind_path)
    f2 = open(negfeaturesind_path)
    f1lines = f1.readlines()
    f2lines = f2.readlines()
    if len(f1lines)!=len(f2lines):
        print "ERROR. len IND != len NEG-IND"
        sys.exit(-1)
    for i, (line, line2) in enumerate(izip(f1lines, f2lines)):
        label = str(i+1)       
 
 print "Loading labels' file:",  labels_path
 labels = jrs_io.load_labels(open(labels_path))
 n = len(labels)
 print "",n," labels' sets loaded."
 
 order = range(n)
 random.shuffle(order)
 print "Random order:", order[:30],"..."
 
 print "Shuffling labels..."
 labels_shuffled = [labels[ix] for ix in order]
 jrs_io.store_labels(open(labels_path+"_shuffled","w"), labels_shuffled)
 
 print "Loading distances' file:",  distance_matrix_path
 distances = jrs_io.load_data(open(distance_matrix_path), lambda x: x)
 try: print "",len(distances), "x",len(distances[0])
 except: pass
 
 print "Extending order..."    
 order = order + range(n, len(distances))
 print "Extended order:", order    
 
 print "Shuffling columns"
 distances_tmp = []
 for row in distances:
     new_row = [ row[ix] for ix in order ]
     distances_tmp.append(new_row)
 
 print "Shuffling rows"
 distances_shuffled = []
    print "Loading labels' file:", labels_path
    labels = jrs_io.load_labels(open(labels_path))
    n = len(labels)
    print "", n, " labels' sets loaded."

    order = range(n)
    random.shuffle(order)
    print "Random order:", order[:30], "..."

    print "Shuffling labels..."
    labels_shuffled = [labels[ix] for ix in order]
    jrs_io.store_labels(open(labels_path + "_shuffled", "w"), labels_shuffled)

    print "Loading distances' file:", distance_matrix_path
    distances = jrs_io.load_data(open(distance_matrix_path), lambda x: x)
    try:
        print "", len(distances), "x", len(distances[0])
    except:
        pass

    print "Extending order..."
    order = order + range(n, len(distances))
    print "Extended order:", order

    print "Shuffling columns"
    distances_tmp = []
    for row in distances:
        new_row = [row[ix] for ix in order]
        distances_tmp.append(new_row)
Esempio n. 5
0
    print " sample counts:", sorted(list(label2count.iteritems()))[:10]
    print " sample `friend`-labels:", sorted(list(label2size.iteritems()))[:10]
    print " sample pairlabel2count:", sorted(list(
        pairlabel2count.iteritems()))[:50]
    print "------------------------------------------"

    print "------------------------------------------"
    print "------------------------------------------"

    avg_label_count = float(sum(len(l) for l in labels)) / len(labels)
    print "Avg labels per object:", avg_label_count
    print "------------------------------------------"

    print "Loading distances' file:", distance_matrix_path
    distances = jrs_io.load_data(open(distance_matrix_path),
                                 cast_method,
                                 numrows=LOAD_ROWS_FROM_FILE)
    try:
        print "", len(distances), "x", len(distances[0])
    except:
        pass
    #print "Sample distances:", distances[:5][:5]
    print "------------------------------------------"

    #KLASYFIKATOR Ensembled Strongest Fractional Knn
    print "Building Ensembled Strongest FractionKNN..."
    training_single_labels = list(
        set(reduce(lambda l1, l2: l1 + l2, (ll for ll in training_labels))))
    print " training labels:", training_single_labels
    print " extracting submatrix..."
    training_distances = jrs_io.extract_submatrix(distances, training_range[0],
Esempio n. 6
0
    print "------------------------------------------"
    
    #print "Building tree using pairlabel2count & label2count"
    #outpath = "/tmp/pairlabel2dist_avg_tree"
    #pairwisecount_tree(label2count, pairlabel2count, outpath)
    #print "------------------------------------------"

    print "------------------------------------------"
    print "------------------------------------------"
    
    avg_label_count = float(sum(len(l) for l in labels)) / len(labels)
    print "Avg labels per object:",avg_label_count
    print "------------------------------------------"                 
        
    print "Loading distances' file:",  distance_matrix_path
    distances = jrs_io.load_data(open(distance_matrix_path), cast_method, numrows = LOAD_ROWS_FROM_FILE)
    try: print "",len(distances), "x",len(distances[0])
    except: pass
    #print "Sample distances:", distances[:5][:5]    
    print "------------------------------------------"
    
    #print "Building tree on sample vs. sample distances avg"
    #outpath = "training_distances_avg_tree"
    #print " extracting submatrix..."
    #training_distances = jrs_io.extract_submatrix(distances, training_range[0], training_range[1], training_range[0], training_range[1])
    #dist_tree(training_distances, training_labels, outpath)    
    #dist_tree(distances, labels, outpath)
    #print "------------------------------------------"
    #print "------------------------------------------"
    #print "------------------------------------------"