コード例 #1
0
ファイル: layer.py プロジェクト: johnnoone/Pincaster
    def nearby(self, coords, radius=None, limit=None, with_properties=True):
        """
        Returns records around coords.

        Parameters:
            coords (tuple): Coordinates like (x, y).
            radius (int): Distance in meters.
            limit (int): Limits fetched records.
            with_properties (bool): Fetch properties.
        Returns:
            RecordSet
        """

        coords = extract_coords(coords)
        point = '%.3f,%.3f' % coords

        params = {
            'properties': with_properties and 1 or 0
        }

        if radius:
            params['radius'] = radius
        if limit:
            params['limit'] = limit

        path = '/api/1.0/search/%s/nearby/%s.json' % (self.name, point)

        command = self.pincaster.server
        response = command(path, 'GET', data=params)
        matches = response.get('matches', [])

        return RecordSet([Record(layer=self, **m) for m in matches])
コード例 #2
0
def main():
    args = parse_args()
    print('Loading ...')
    PATH = './data/'
    test = pd.read_csv(PATH + 'sample_submission.csv')
    #test = test.iloc[:50]
    test_images_dir = PATH + 'test_images/{}.jpg'
    df_test = test
    test_dataset = CarDataset(df_test, test_images_dir, sigma=1, training=False)
    load_model = args.load_model
    predictions, predictions_dropmask = [], []
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if torch.cuda.is_available():
        model = torch.load(load_model)
    else:
        model = torch.load(load_model, map_location='cpu')

    if isinstance(model, torch.nn.DataParallel):
        model = model.module
    test_loader = DataLoader(dataset=test_dataset, batch_size=4, shuffle=False, num_workers=4)
    model.eval()
    print('Start Evaluation ...')
    for img, _, _, _, dropmasks in tqdm(test_loader):
        img = img.float().to(device)
        dropmasks = dropmasks.data.cpu().numpy()
        with torch.no_grad():
            output = model(img)
        if type(output) is list:
            output = output[-1]
            output = output['hm'] if type(output) is dict else output
        output = output.data.cpu().numpy()
        for out, test_mask in zip(output, dropmasks):
            # get unprocessed value
            coords = extract_coords(out, args.threshold)
            s = coords2str(coords)
            predictions.append(s)
            #test_mask = cv2.resize(test_mask[0], (IMG_WIDTH // MODEL_SCALE,  IMG_HEIGHT // MODEL_SCALE))
            # test_mask = np.where(test_mask > 255 // 2, 100, 0)  # subtract from logits
            #print(test_mask.shape, out[0].shape)
            #print(out[0].mean())
            #print(sth.sum())
            #out[0, test_mask > (255 // 2)] = -100
            #print(out[0].mean())
            #coords = extract_coords(out, args.threshold)
            #s = coords2str(coords)
            #predictions_dropmask.append(s)

    save_dir = load_model.split('/')[:-1]
    save_dir = '/'.join(save_dir)
    save_submission_file(test.copy(), save_dir, predictions, args.threshold, 'origin')
コード例 #3
0
ファイル: layer.py プロジェクト: egradman/Pincaster
 def nearby(self, coords, radius=None, limit=None, with_properties=True):
     """
     Returns records around coords.
     
     :keyword coords: see :attr:`coords`
     :keyword radius: see :attr:`radius`
     :keyword limit: see :attr:`limit`
     :keyword with_properties: see :attr:`with_properties`
     
     .. attribute:: coords
         
         Coordinates like (x, y).
     
     .. attribute:: radius
         
         Distance in meters.
     
     .. attribute:: limit
         
         Limits fetched records.
     
     .. attribute:: with_properties
         
         Fetch properties.
     
     """
     
     coords = extract_coords(coords)
     point = '%.3f,%.3f' % coords
     
     params = {
         'properties': with_properties and 1 or 0
     }
     
     if radius:
         params['radius'] = radius
     if limit:
         params['limit'] = limit
     
     path = '/api/1.0/search/%s/nearby/%s.json' % (self.name, point)
     
     command = self.pincaster.server
     response = command(path, 'GET', data=params)
     matches = response.get('matches', [])
     
     return RecordSet([Record(layer=self, **m) for m in matches])
コード例 #4
0
ファイル: submit.py プロジェクト: TheClearwind/kaggle
from model import CentResnet
from utils import extract_coords, coords2str

PATH = Config.PATH
device = Config.device
predictions = []
test_images_dir = PATH + 'test_images/{}.jpg'
test = pd.read_csv(PATH + 'sample_submission.csv')
df_test = test
test_dataset = CarDataset(df_test, test_images_dir, False)
test_loader = DataLoader(dataset=test_dataset,
                         batch_size=4,
                         shuffle=False,
                         num_workers=4)
model = CentResnet(8).to(device)
model.load_state_dict(torch.load(Config.save_path))
model.eval()
if __name__ == '__main__':

    for img, _, _ in tqdm(test_loader):
        with torch.no_grad():
            output = model(img.to(device))
        output = output.data.cpu().numpy()
        for out in output:
            coords = extract_coords(out)
            s = coords2str(coords)
            predictions.append(s)
    test = pd.read_csv(PATH + 'sample_submission.csv')
    test['PredictionString'] = predictions
    test.to_csv('submission.csv', index=False)
コード例 #5
0
def minibatch_(functions, clf,scaler,w, loss__,mse,hinge1,hinge2,full_image,img_nr,alphas,learning_rate,subsamples, mode):
    X_p, y_p, inv = get_data_from_img_nr(class_,img_nr, subsamples)
    if X_p != []:
        boxes = []
        ground_truth = inv[0][2]
        img_nr = inv[0][0]
        print img_nr
        if less_features:
            X_p = [fts[0:features_used] for fts in X_p]
        if os.path.isfile('/var/node436/local/tstahl/Coords_prop_windows/'+ (format(img_nr, "06d")) +'.txt'):
            f = open('/var/node436/local/tstahl/Coords_prop_windows/'+ (format(img_nr, "06d")) +'.txt', 'r')
        else:
            print 'warning'
        for line, y in zip(f, inv):
            tmp = line.split(',')
            coord = []
            for s in tmp:
                coord.append(float(s))
            boxes.append([coord, y[2]])
        #assert(len(boxes)<500)
        boxes, y_p, X_p = sort_boxes(boxes, y_p, X_p, 0,5000)
        
        if os.path.isfile('/var/node436/local/tstahl/GroundTruth/%s/%s.txt'%(class_,format(img_nr, "06d"))):
            gr = open('/var/node436/local/tstahl/GroundTruth/%s/%s.txt'%(class_,format(img_nr, "06d")), 'r')
        else:
            gr = []
        ground_truths = []
        for line in gr:
           tmp = line.split(',')
           ground_truth = []
           for s in tmp:
              ground_truth.append(int(s))
           ground_truths.append(ground_truth)
        
        #prune boxes
        pruned_x = []
        pruned_y = []
        pruned_boxes = []
        if prune:
            for i, y_ in enumerate(y_p):
                if y_ > 0:
                    pruned_x.append(X_p[i])
                    pruned_y.append(y_p[i])
                    pruned_boxes.append(boxes[i])
        else:
            pruned_x = X_p
            pruned_y = y_p
            pruned_boxes = boxes
        
        if subsampling and pruned_boxes > subsamples:
            pruned_x = pruned_x[0:subsamples]
            pruned_y = pruned_y[0:subsamples]
            pruned_boxes = pruned_boxes[0:subsamples]
            
            
        # create_tree
        G, levels = create_tree(pruned_boxes)
        
        #prune tree to only have levels which fully cover the image, tested
        if prune_fully_covered:
            nr_levels_covered = 100
            total_size = surface_area(pruned_boxes, levels[0])
            for level in levels:
                sa = surface_area(pruned_boxes, levels[level])
                sa_co = sa/total_size
                if sa_co != 1.0:
                    G.remove_nodes_from(levels[level])
                else:
                    nr_levels_covered = level
            levels = {k: levels[k] for k in range(0,nr_levels_covered + 1)}
            
        # prune levels, speedup + performance 
        levels_tmp = {k:v for k,v in levels.iteritems() if k<prune_tree_levels}
        levels_gone = {k:v for k,v in levels.iteritems() if k>=prune_tree_levels}
        levels = levels_tmp
        #prune tree as well, for patches training
        for trash_level in levels_gone.values():
            G.remove_nodes_from(trash_level)
        
        coords = []
        features = []
        f_c = []
        f = []
        
        #either subsampling or prune_fully_covered
        #assert(subsampling != prune_fully_covered)
        
        if subsampling:
            if os.path.isfile('/var/node436/local/tstahl/Features_prop_windows/upper_levels/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples)):
                f_c = open('/var/node436/local/tstahl/Features_prop_windows/upper_levels/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples), 'r+')
            else:
                if mode == 'extract_train' or mode == 'extract_test':                
                    print 'coords for %s with %s samples have to be extracted'%(img_nr,subsamples)
                    f_c = open('/var/node436/local/tstahl/Features_prop_windows/upper_levels/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples), 'w')
                    for level in levels:
                        levl_boxes = extract_coords(levels[level], pruned_boxes)
                        if levl_boxes != []:
                            for lvl_box in levl_boxes:
                                if lvl_box not in coords:
                                    coords.append(lvl_box)
                                    f_c.write('%s,%s,%s,%s'%(lvl_box[0],lvl_box[1],lvl_box[2],lvl_box[3]))
                                    f_c.write('\n')
                    f_c.close()
                    print 'features for %s with %s samples have to be extracted'%(img_nr,subsamples)
                    os.system('export PATH=$PATH:/home/koelma/impala/lib/x86_64-linux-gcc')
                    os.system('export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/koelma/impala/third.13.03/x86_64-linux/lib')
                    #print "EuVisual /var/node436/local/tstahl/Images/%s.jpg /var/node436/local/tstahl/Features_prop_windows/Features_upper/sheep_%s_%s.txt --eudata /home/koelma/EuDataBig --imageroifile /var/node436/local/tstahl/Features_prop_windows/upper_levels/sheep_%s_%s.txt"%((format(img_nr, "06d")),format(img_nr, "06d"),subsamples,format(img_nr, "06d"),subsamples)
                    os.system("EuVisual /var/node436/local/tstahl/Images/%s.jpg /var/node436/local/tstahl/Features_prop_windows/Features_upper/%s_%s_%s.txt --eudata /home/koelma/EuDataBig --imageroifile /var/node436/local/tstahl/Features_prop_windows/upper_levels/%s_%s_%s.txt"%(class_,(format(img_nr, "06d")),format(img_nr, "06d"),subsamples,class_,format(img_nr, "06d"),subsamples))
                    if os.path.isfile('/var/node436/local/tstahl/Features_prop_windows/upper_levels/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples)):
                        f_c = open('/var/node436/local/tstahl/Features_prop_windows/upper_levels/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples), 'r')
                    else:
                        f_c = []
            coords = []
                
            if os.path.isfile('/var/node436/local/tstahl/Features_prop_windows/Features_upper/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples)):
                f = open('/var/node436/local/tstahl/Features_prop_windows/Features_upper/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples), 'r') 
                
                
        elif prune_fully_covered:
            if os.path.isfile('/var/node436/local/tstahl/Features_prop_windows/upper_levels/sheep_%s_fully_cover_tree.txt'%(format(img_nr, "06d"))):
                f_c = open('/var/node436/local/tstahl/Features_prop_windows/upper_levels/sheep_%s_fully_cover_tree.txt'%(format(img_nr, "06d")), 'r+')
                
                
            else:
                if mode == 'extract_train' or mode == 'extract_test':                
                    print 'coords for %s with fully_cover_tree samples have to be extracted'%(img_nr)
                    f_c = open('/var/node436/local/tstahl/Features_prop_windows/upper_levels/sheep_%s_fully_cover_tree.txt'%(format(img_nr, "06d")), 'w')
                    for level in levels:
                        levl_boxes = extract_coords(levels[level], pruned_boxes)
                        if levl_boxes != []:
                            for lvl_box in levl_boxes:
                                if lvl_box not in coords:
                                    coords.append(lvl_box)
                                    f_c.write('%s,%s,%s,%s'%(lvl_box[0],lvl_box[1],lvl_box[2],lvl_box[3]))
                                    f_c.write('\n')
                    f_c.close()
                    print 'features for %s with fully_cover_tree samples have to be extracted'%(img_nr)
                    os.system('export PATH=$PATH:/home/koelma/impala/lib/x86_64-linux-gcc')
                    os.system('export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/koelma/impala/third.13.03/x86_64-linux/lib')
                    #print "EuVisual /var/node436/local/tstahl/Images/%s.jpg /var/node436/local/tstahl/Features_prop_windows/Features_upper/sheep_%s_%s.txt --eudata /home/koelma/EuDataBig --imageroifile /var/node436/local/tstahl/Features_prop_windows/upper_levels/sheep_%s_%s.txt"%((format(img_nr, "06d")),format(img_nr, "06d"),subsamples,format(img_nr, "06d"),subsamples)
                    print "EuVisual /var/node436/local/tstahl/Images/%s.jpg /var/node436/local/tstahl/Features_prop_windows/Features_upper/sheep_%s_fully_cover_tree.txt --eudata /home/koelma/EuDataBig --imageroifile /var/node436/local/tstahl/Features_prop_windows/upper_levels/sheep_%s_fully_cover_tree.txt"%((format(img_nr, "06d")),format(img_nr, "06d"),format(img_nr, "06d"))
                    os.system("EuVisual /var/node436/local/tstahl/Images/%s.jpg /var/node436/local/tstahl/Features_prop_windows/Features_upper/sheep_%s_fully_cover_tree.txt --eudata /home/koelma/EuDataBig --imageroifile /var/node436/local/tstahl/Features_prop_windows/upper_levels/sheep_%s_fully_cover_tree.txt"%((format(img_nr, "06d")),format(img_nr, "06d"),format(img_nr, "06d")))
                    if os.path.isfile('/var/node436/local/tstahl/Features_prop_windows/upper_levels/sheep_%s_fully_cover_tree.txt'%(format(img_nr, "06d"))):
                        f_c = open('/var/node436/local/tstahl/Features_prop_windows/upper_levels/sheep_%s_fully_cover_tree.txt'%(format(img_nr, "06d")), 'r')
                    else:
                        f_c = []
            coords = []
                
            if os.path.isfile('/var/node436/local/tstahl/Features_prop_windows/Features_upper/sheep_%s_fully_cover_tree.txt'%(format(img_nr, "06d"))):
                f = open('/var/node436/local/tstahl/Features_prop_windows/Features_upper/sheep_%s_fully_cover_tree.txt'%(format(img_nr, "06d")), 'r') 
                        
                
        else:
            if os.path.isfile('/var/node436/local/tstahl/Features_prop_windows/Features_upper/sheep%s.txt'%(format(img_nr, "06d"))):
                f = open('/var/node436/local/tstahl/Features_prop_windows/Features_upper/sheep%s.txt'%(format(img_nr, "06d")), 'r') 
            if os.path.isfile('/var/node436/local/tstahl/Features_prop_windows/upper_levels/sheep%s.txt'%(format(img_nr, "06d"))):
                f_c = open('/var/node436/local/tstahl/Features_prop_windows/upper_levels/sheep%s.txt'%(format(img_nr, "06d")), 'r+')
                
        if f_c != []:
            for i,line in enumerate(f_c):
                str_ = line.rstrip('\n').split(',')
                cc = []
                for s in str_:
                   cc.append(float(s))
                coords.append(cc)
        if f != []:
            for i,line in enumerate(f):
                str_ = line.rstrip('\n').split(',')  
                ff = []
                for s in str_:
                   ff.append(float(s))
                features.append(ff)
        #assert len(coords) == len(features)
        
        # append x,y of intersections
        if learn_intersections:
            for inters,coord in zip(features,coords):
#                if inters not in pruned_x:
                pruned_x.append(inters)
                ol = 0.0
                ol = get_intersection_count(coord, ground_truths)
                pruned_y.append(ol)
                
        if mode == 'mean_variance':
            print 'normalizing'
            sum_x += np.array(pruned_x).sum(axis=0)
            n_samples += len(pruned_x)
            sum_sq_x +=  (np.array(pruned_x)**2).sum(axis=0)
            scaler.partial_fit(pruned_x)  # Don't cheat - fit only on training data
            return sum_x,n_samples,sum_sq_x, scaler
            
        if less_features:
            features = [fts[0:features_used] for fts in features]
        #normalize
        norm_x = []
        if normalize and (mode != 'extract_train' and mode != 'extract_test'):
#            for p_x in pruned_x:
#                norm_x.append((p_x-mean)/variance)
            norm_x = scaler.transform(pruned_x)
            if features != []:
                features = scaler.transform(features)
        else:
            norm_x = pruned_x
        data = (G, levels, pruned_y, norm_x, pruned_boxes, ground_truths, alphas)
        sucs = nx.dfs_successors(G)
        
        predecs = nx.dfs_predecessors(G)
        
        #preprocess: node - children
        children = {}
        last = -1
        for node,children_ in zip(sucs.keys(),sucs.values()):
            if node != last+1:
                for i in range(last+1,node):
                    children[i] = []
                children[node] = children_
            elif node == last +1:
                children[node] = children_
            last = node
        if mode == 'training':
            if alphas[0] == 0: #if we don't learn the proposals, we learn just the levels: better, because every level has same importance and faster
                print 'training levels', img_nr
                for level in levels:
                    print 'level' , level
                    if img_nr in functions:
                        if level in functions[img_nr]:
                            function = functions[img_nr][level]
                        else:
                            function = []
                    else:
                        functions[img_nr] = {}
                        function = []
                    w, function = tree_level_regression(class_,function,levels,level,features,coords,scaler,w,norm_x,pruned_y,None,predecs,children,pruned_boxes,learning_rate,alphas,img_nr,jans_idea)
                    if level not in functions[img_nr]:
                        functions[img_nr][level] = function
                return w, len(pruned_y), len(levels)
            else: #if we learn proposals, levels with more proposals have more significance...., slow - need to change
                print 'training patches', img_nr
                print predecs
                nodes = list(G.nodes())
                for node in nodes:
                    print node
                    if node == 0:
                        w = learn_root(w,norm_x[0],pruned_y[0],learning_rate,alphas)
                    else:
                        for num,n in enumerate(levels.values()):
                            if node in n:
                                level = num
                                break
                        if img_nr in functions:
                            if level in functions[img_nr]:
                                function = functions[img_nr][level]
                            else:
                                function = []
                        else:
                            functions[img_nr] = {}
                            function = []
                        #w, function = tree_level_regression(class_,function,levels,level,features,coords,scaler,w,norm_x,pruned_y,node,predecs,children,pruned_boxes,learning_rate,alphas,img_nr)
                        w, function = constrained_regression(class_,function,features,coords,scaler,w,norm_x,pruned_y,node,predecs,children,pruned_boxes,learning_rate,alphas,img_nr,squared_hinge_loss)
                        #TODO: train regressor/classifier that predicts/chooses level. Features: level, number of proposals, number of intersections, avg size of proposal, predictions(for regressor), etc.
                        if level not in functions[img_nr]:
                            functions[img_nr][level] = function
                return w, len(pruned_y), len(G.nodes())
        elif mode == 'scikit_train':
            clf.partial_fit(norm_x,pruned_y)
            return clf
        elif mode == 'loss_train':
            if alphas[0] == 0: #levels
                loss__.append(tree_level_loss(class_,features,coords,scaler, w, data, predecs, children,img_nr,-1,functions))
                return loss__
            else:
                loss__.append(loss(class_,squared_hinge_loss,features,coords,scaler,w, data, predecs, children,img_nr, -1))
        elif mode == 'loss_test' or mode == 'loss_eval':
            print mode, loss__
            if alphas[0] == 0: #levels
                loss__.append(tree_level_loss(class_,features,coords,scaler, w, data, predecs, children,img_nr,-1,functions))
                cpl = max(0, np.dot(w,np.array(norm_x[0]).T))
                full_image.append([pruned_y[0],cpl])
                return loss__,full_image
            else:
                loss__.append(loss(class_,squared_hinge_loss,features,coords,scaler,w, data, predecs, children,img_nr, -1))
                cpl = max(0, np.dot(w,np.array(norm_x[0]).T))
                full_image.append([pruned_y[0],cpl])
                return loss__,full_image
        elif mode == 'loss_scikit_test' or mode == 'loss_scikit_train':
            loss__.append(((clf.predict(norm_x) - pruned_y)**2).sum())
            return loss__ 
        elif mode == 'levels_train' or mode == 'levels_test':
            preds = []
            for i,x_ in enumerate(norm_x):
                preds.append(np.dot(w, x_))
            cpls = []
            truelvls = []
            used_boxes_ = []
            total_size = surface_area(pruned_boxes, levels[0])
            fully_covered_score = 0.0
            fully_covered_score_lvls = 0.0
            covered_levels = []
            print mode, len(levels)
            for level in levels:
                function = functions[img_nr][level]
                cpl,used_boxes,_ = count_per_level([],class_,features,coords,scaler,w, preds, img_nr, pruned_boxes,levels[level], '',function)
                # clipp negative predictions
                cpl = max(0,cpl)
                if used_boxes != []:
                    used_boxes_.append(used_boxes[0][1])
                tru = y_p[0]
                cpls.append(cpl)
                sa = surface_area(pruned_boxes, levels[level])
                sa_co = sa/total_size
                if sa_co == 1.0:
                   fully_covered_score += cpl
                   fully_covered_score_lvls += 1
                   covered_levels.append(cpl)
                truelvls.append(tru)
            return cpls,truelvls
コード例 #6
0
ファイル: hope.py プロジェクト: TheClearwind/kaggle
PATH = Config.PATH
device = Config.device
predictions = []
train_images_dir = PATH + 'train_images/{}.jpg'

train = pd.read_csv(PATH + 'train.csv')
df_train, df_dev = train_test_split(train, test_size=0.2, random_state=118)
model = CentResnet().to(device)
# model.load_state_dict(torch.load(Config.save_path))
model.eval()
if __name__ == '__main__':
    import numpy as np

    reg_cout = 0
    pre_cout = 0
    for idx in range(len(df_dev)):
        data = df_dev.iloc[idx]
        imageid, prestr = data["ImageId"], data["PredictionString"]
        path = PATH + 'train_images/{}.jpg'.format(imageid)
        img0 = imread(path, True, True)
        img = preprocess_image(img0)
        img = np.rollaxis(img, 2, 0)
        reg_cout += len(prestr.split()) // 7
        img = img[np.newaxis, :]
        img = torch.tensor(img).to(device)
        with torch.no_grad():
            out = model(img)
            coords = extract_coords(out[0].cpu().numpy(), threshold=0)
            pre_cout += len(coords)
        print("reg cout:{} pre count:{}".format(reg_cout, pre_cout))
コード例 #7
0
def minibatch_(all_train_imgs,all_test_imgs,clf,scaler,w, loss__,mse,hinge1,hinge2,full_image,alphas,learning_rate,test_imgs, train_imgs,minibatch,subsamples,sum_x,n_samples,sum_sq_x,mean,variance, mode,mous):
    if mode == 'loss_test' or mode == 'loss_scikit_test' or mode == 'levels_test':
        X_p, y_p, inv = get_data(class_, test_imgs, train_imgs, minibatch, minibatch + 1, 'test', c, subsamples)
    else:
        X_p, y_p, inv = get_data(class_, test_imgs, train_imgs, minibatch, minibatch + 1, 'training', c, subsamples)        
    if X_p != []:
        boxes = []
        ground_truth = inv[0][2]
        img_nr = inv[0][0]
        if less_features:
            X_p = [fts[0:features_used] for fts in X_p]
        if os.path.isfile('/home/stahl/Coords_prop_windows/'+ (format(img_nr, "06d")) +'.txt'):
            f = open('/home/stahl/Coords_prop_windows/'+ (format(img_nr, "06d")) +'.txt', 'r')
        else:
            print 'warning'
        for line, y in zip(f, inv):
            tmp = line.split(',')
            coord = []
            for s in tmp:
                coord.append(float(s))
            boxes.append([coord,y[2]])
        assert(len(boxes)<1500)
        boxes, y_p, X_p = sort_boxes(boxes, y_p, X_p, 0,1500)
        
        gr = []
        if os.path.isfile('/home/stahl/GroundTruth/%s/%s.txt'%(class_,format(img_nr, "06d"))):
            gr = open('/home/stahl/GroundTruth/%s/%s.txt'%(class_,format(img_nr, "06d")), 'r')
        ground_truths = []
        if gr != []: #if no class image -> no ground truth. (I think this is only needed for learn _ntersection)
            for line in gr:
               tmp = line.split(',')
               ground_truth = []
               for s in tmp:
                  ground_truth.append(int(s))
               ground_truths.append(ground_truth)
        
        if mode == 'mean_variance':
            scaler.partial_fit(X_p)  # Don't cheat - fit only on training data
            return scaler
            
        # create_tree
        G, levels = create_tree(boxes)
        
        #prune tree to only have levels which fully cover the image
        # tested
        if prune_fully_covered:
            nr_levels_covered = 100
            total_size = surface_area(boxes, levels[0])
            for level in levels:
                sa = surface_area(boxes, levels[level])
                sa_co = sa/total_size
                if sa_co != 1.0:
                    G.remove_nodes_from(levels[level])
                else:
                    nr_levels_covered = level
            levels = {k: levels[k] for k in range(0,nr_levels_covered + 1)}
        
        #either subsampling or prune_fully_covered
        #assert(subsampling != prune_fully_covered)
        
        # prune levels, speedup + performance 
        levels = {k:v for k,v in levels.iteritems() if k<prune_tree_levels}
        
        #prune G in order to remove nodes of the lower levels
        remaining_nodes = []
        for lev in levels.values():
            remaining_nodes.extend(lev)
        for g_node in G.nodes():
            if g_node not in remaining_nodes:
                G.remove_node(g_node)
        
        coords = []
        features = []
        f_c = []
        f = []
        
        if learn_intersections and not prune_fully_covered:
            if os.path.isfile('/home/stahl/Features_prop_windows/upper_levels/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples)):
                f_c = open('/home/stahl/Features_prop_windows/upper_levels/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples), 'r+')
            else:
                if mode == 'extract_train' or mode == 'extract_test':                
                    print 'coords for %s with %s samples have to be extracted'%(img_nr,subsamples)
                    f_c = open('/home/stahl/Features_prop_windows/upper_levels/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples), 'w')
                    for level in levels:
                        levl_boxes = extract_coords(levels[level], boxes)
                        if levl_boxes != []:
                            for lvl_box in levl_boxes:
                                if lvl_box not in coords:
                                    coords.append(lvl_box)
                                    f_c.write('%s,%s,%s,%s'%(lvl_box[0],lvl_box[1],lvl_box[2],lvl_box[3]))
                                    f_c.write('\n')
                    f_c.close()
                    print 'features for %s with %s samples have to be extracted'%(img_nr,subsamples)
                    os.system('export PATH=$PATH:/home/koelma/impala/lib/x86_64-linux-gcc')
                    os.system('export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/koelma/impala/third.13.03/x86_64-linux/lib')
                    #print "EuVisual /home/stahl/Images/%s.jpg /home/stahl/Features_prop_windows/Features_upper/sheep_%s_%s.txt --eudata /home/koelma/EuDataBig --imageroifile /home/stahl/Features_prop_windows/upper_levels/sheep_%s_%s.txt"%((format(img_nr, "06d")),format(img_nr, "06d"),subsamples,format(img_nr, "06d"),subsamples)
                    os.system("EuVisual /home/stahl/Images/%s.jpg /home/stahl/Features_prop_windows/Features_upper/%s_%s_%s.txt --eudata /home/koelma/EuDataBig --imageroifile /home/stahl/Features_prop_windows/upper_levels/%s_%s_%s.txt"%(class_,(format(img_nr, "06d")),format(img_nr, "06d"),subsamples,class_,format(img_nr, "06d"),subsamples))
                    if os.path.isfile('/home/stahl/Features_prop_windows/upper_levels/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples)):
                        f_c = open('/home/stahl/Features_prop_windows/upper_levels/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples), 'r')
                    else:
                        f_c = []
            coords = []
                
            if os.path.isfile('/home/stahl/Features_prop_windows/Features_upper/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples)):
                f = open('/home/stahl/Features_prop_windows/Features_upper/%s_%s_%s.txt'%(class_,format(img_nr, "06d"),subsamples), 'r') 
                
                
        elif prune_fully_covered:
            if os.path.isfile('/home/stahl/Features_prop_windows/upper_levels/sheep_%s_fully_cover_tree.txt'%(format(img_nr, "06d"))):
                f_c = open('/home/stahl/Features_prop_windows/upper_levels/sheep_%s_fully_cover_tree.txt'%(format(img_nr, "06d")), 'r+')
                
                
            else:
                if mode == 'extract_train' or mode == 'extract_test':                
                    print 'coords for %s with fully_cover_tree samples have to be extracted'%(img_nr)
                    f_c = open('/home/stahl/Features_prop_windows/upper_levels/%s_%s_fully_cover_tree.txt'%(class_,format(img_nr, "06d")), 'w')
                    for level in levels:
                        levl_boxes = extract_coords(levels[level], boxes)
                        if levl_boxes != []:
                            for lvl_box in levl_boxes:
                                if lvl_box not in coords:
                                    coords.append(lvl_box)
                                    f_c.write('%s,%s,%s,%s'%(lvl_box[0],lvl_box[1],lvl_box[2],lvl_box[3]))
                                    f_c.write('\n')
                    f_c.close()
                    print 'features for %s with fully_cover_tree samples have to be extracted'%(img_nr)
                    os.system('export PATH=$PATH:/home/koelma/impala/lib/x86_64-linux-gcc')
                    os.system('export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/koelma/impala/third.13.03/x86_64-linux/lib')
                    #print "EuVisual /home/stahl/Images/%s.jpg /home/stahl/Features_prop_windows/Features_upper/sheep_%s_%s.txt --eudata /home/koelma/EuDataBig --imageroifile /home/stahl/Features_prop_windows/upper_levels/sheep_%s_%s.txt"%((format(img_nr, "06d")),format(img_nr, "06d"),subsamples,format(img_nr, "06d"),subsamples)
                    os.system("EuVisual /home/stahl/Images/%s.jpg /home/stahl/Features_prop_windows/Features_upper/%s_%s_fully_cover_tree.txt --eudata /home/koelma/EuDataBig --imageroifile /home/stahl/Features_prop_windows/upper_levels/%s_%s_fully_cover_tree.txt"%(class_,(format(img_nr, "06d")),format(img_nr, "06d"),class_,format(img_nr, "06d")))
                    if os.path.isfile('/home/stahl/Features_prop_windows/upper_levels/%s_%s_fully_cover_tree.txt'%(class_,format(img_nr, "06d"))):
                        f_c = open('/home/stahl/Features_prop_windows/upper_levels/%s_%s_fully_cover_tree.txt'%(class_,format(img_nr, "06d")), 'r')
                    else:
                        f_c = []
            coords = []
                
            if os.path.isfile('/home/stahl/Features_prop_windows/Features_upper/sheep_%s_fully_cover_tree.txt'%(format(img_nr, "06d"))):
                f = open('/home/stahl/Features_prop_windows/Features_upper/sheep_%s_fully_cover_tree.txt'%(format(img_nr, "06d")), 'r') 
                        
                
#        else: #we don't need to load intersections
#            if os.path.isfile('/home/stahl/Features_prop_windows/Features_upper/%s%s.txt'%(class_,format(img_nr, "06d"))):
#                f = open('/home/stahl/Features_prop_windows/Features_upper/%s%s.txt'%(class_,format(img_nr, "06d")), 'r') 
#            if os.path.isfile('/home/stahl/Features_prop_windows/upper_levels/%s%s.txt'%(class_,format(img_nr, "06d"))):
#                f_c = open('/home/stahl/Features_prop_windows/upper_levels/%s%s.txt'%(class_,format(img_nr, "06d")), 'r+')
#            else:
#                print '/home/stahl/Features_prop_windows/upper_levels/%s%s.txt does not exist'%(class_,format(img_nr, "06d"))
        for i,line in enumerate(f_c):
            str_ = line.rstrip('\n').split(',')
            cc = []
            for s in str_:
               cc.append(float(s))
            coords.append(cc)
        for i,line in enumerate(f):
            str_ = line.rstrip('\n').split(',')  
            ff = []
            for s in str_:
               ff.append(float(s))
            features.append(ff)
        if less_features:
            features = [fts[0:features_used] for fts in features]
        if normalize and features != []:
            features = scaler.transform(features)
        
        print len(y_p), len(X_p)
        print len(features), len(coords)
        assert len(coords) == len(features)
        
        # append x,y of intersections
        #if learn_intersections:
        #    for inters,coord in zip(features,coords):
#                if inters not in pruned_x:
        #        X_p.append(inters)
        #        ol = 0.0
        #        ol = get_intersection_count(coord, ground_truths)
        #        y_p.append(ol)
        print len(y_p), len(X_p)
        #normalize
        norm_x = []
        if normalize:
#            for p_x in pruned_x:
#                norm_x.append((p_x-mean)/variance)
            norm_x = scaler.transform(X_p)
        else:
            norm_x = X_p
        data = (G, levels, y_p, norm_x, boxes, ground_truths, alphas)
        sucs = nx.dfs_successors(G)
        
        predecs = nx.dfs_predecessors(G)
        
        #preprocess: node - children
        children = {}
        last = -1
        for node,children_ in zip(sucs.keys(),sucs.values()):
            if node != last+1:
                for i in range(last+1,node):
                    children[i] = []
                children[node] = children_
            elif node == last +1:
                children[node] = children_
            last = node
        if mode == 'train':
            if alphas[2] == 0 and alphas[3] == 0: #just learn proposals and intersections
                # only use proposals and intersections used in pruned tree
                used_ind = get_used_proposals(G, boxes, coords, levels)
                used_x = []
                used_y = []
                for ind in used_ind['prop']:
                    used_x.append(norm_x[ind])
                    used_y.append(y_p[ind])
                for ind in used_ind['inters']:
                    used_x.append(features[ind])
                    used_y.append(get_intersection_count(coords[ind], ground_truths))
                print len(used_x),len(used_y)
                for x_i,y_i in zip(used_x,used_y):
                    w = learn_root(w,x_i,y_i,learning_rate,alphas)
            else:
                nodes = list(G.nodes())
                for node in nodes:
                    if node == 0:
                        if alphas[0] != 0:
                            w = learn_root(w,norm_x[0],y_p[0],learning_rate,alphas)
                        else:
                            print 'learn nothing'
                    else:
                        w = constrained_regression(class_,features,coords,scaler,w,norm_x,y_p,node,predecs,children,boxes,learning_rate,alphas,img_nr, squared_hinge_loss)
            return w, len(y_p), len(G.nodes())
        elif mode == 'scikit_train':
            print norm_x,y_p
            clf.partial_fit(norm_x,y_p)
            return clf
        elif mode == 'loss_train' or mode == 'loss_test':
            loss__.append(loss(class_,squared_hinge_loss,features,coords,scaler, w, data, predecs, children,img_nr,-1))
#            mse.append(((data[2] - np.dot(w,np.array(data[3]).T)) ** 2).sum())
#            a2 = alphas[2]
#            data = (G, levels, y_p, norm_x, boxes, ground_truths, [0,0,a2,0])
#            hinge1.append(loss(class_,squared_hinge_loss,features,coords,scaler, w, data, predecs, children,img_nr,-1))
#            a3 = alphas[3]
#            data = (G, levels, y_p, norm_x, boxes, ground_truths, [0,0,0,a3])
#            hinge2.append(loss(class_,squared_hinge_loss,features,coords,scaler, w, data, predecs, children,img_nr,-1))
            full_image.append([y_p[0],max(0,np.dot(w,np.array(norm_x[0]).T))])
            return loss__, full_image
        elif mode == 'loss_scikit_test' or mode == 'loss_scikit_train':
            loss__.append(((clf.predict(norm_x) - y_p)**2).sum())
            return loss__ 
        elif mode == 'levels_train' or mode == 'levels_test':
            #im = mpimg.imread('/home/stahl/Images/'+ (format(img_nr, "06d")) +'.jpg')
            preds = []
            for i,x_ in enumerate(norm_x):
                preds.append(np.dot(w, x_))
            cpls = []
            truelvls = []
            used_boxes_ = []
            total_size = surface_area(boxes, levels[0])
            fully_covered_score = 0.0
            fully_covered_score_lvls = 0.0
            covered_levels = []
            for level in levels:
                #tru and truelvls was in order to check if count_per_level method is correct
                cpl,used_boxes,_ = count_per_level(class_,features,coords,scaler,w, preds, img_nr, boxes,levels[level], '',[])
                cpl = max(0,cpl)
                if used_boxes != []:
                    used_boxes_.append(used_boxes[0][1])
                tru,_,_ = count_per_level(class_,features,coords,scaler,w, preds, img_nr, boxes,levels[level], 'gt',[])
                cpls.append(cpl)
                sa = surface_area(boxes, levels[level])
                sa_co = sa/total_size
                if sa_co == 1.0:
                   fully_covered_score += cpl
                   fully_covered_score_lvls += 1
                   covered_levels.append(cpl)
                truelvls.append(tru)
            return cpls, truelvls, used_boxes_,boxes,preds,fully_covered_score/fully_covered_score_lvls,covered_levels