def gettingSolu():
    global monOutput
    global FEATURE_NUMBER
    #var pour indiquer si on prend un toy example ou pas
    puppet = False
    #tableau qui contiendra toutes les features de tlm pr voir lesquelles contiennent des NaN
    tabF= None
    
    #print os.getcwd()
    newFrameLot = None
    listD = os.listdir('/media/lalil0u/New/workspace2/Tracking/data/raw')
    for plate in listD:
        print plate
        listW = os.listdir('/media/lalil0u/New/workspace2/Tracking/data/raw/'+plate)
        for well in listW:
            well=well[:-5]
            print well
            filename = '/media/lalil0u/New/workspace2/Tracking/data/raw/'+plate+"/"+well+".hdf5"
            if puppet:
                filenameT = '/media/lalil0u/New/workspace2/Tracking/data/puppet_trainingset/PL'+plate+"___P"+well+"___T00000.xml"
            else:
                filenameT = '/media/lalil0u/New/workspace2/Tracking/data/trainingset/PL'+plate+"___P"+well+"___T00000.xml"
            
            
    
            monOutput+="plate = "+plate+",well = "+well+"\n"
            #ajout du frameLot et du tabF
            frameLotC, tabFC = gettingRaw(filename, filenameT, plate, well)
            if newFrameLot == None:
                newFrameLot = frameLotC 
            else: newFrameLot.addFrameLot(frameLotC)
            tabF = tabFC if tabF == None else np.vstack((tabF, tabFC))
    
    print "final training set content :"
    count, total= newFrameLot.statisticsTraining2()
    print count, total
    
    #en ce qui concerne le nettoyage des NaN
    c, f = treatments.whichAreNan(tabF)
    print tabF.shape
    featuresToDelete = f.keys()
    
#    for morpho in filter(lambda x : x not in featuresToDelete, l_indexes):
#        featuresToDelete.append(morpho)
    tabF = treatments.clean(tabF, f.keys())
    
    newFrameLot.clean(featuresToDelete) ##np.delete(X, f, 1)
    FEATURE_NUMBER -=len(featuresToDelete)
    fichier = open("../results/featuresToDelete.pkl", 'w')
    pickle.dump(featuresToDelete, fichier)
    fichier.close()

    print FEATURE_NUMBER
    #lstCellsT, lstCellsF, X, Y, Xz, Z = newFrameLot.getTraining2()
    print "uplets now"
    #ICI ON RECUPERE DONC LES SINGLETS ET DOUBLETS AVEC LA VALEUR DU TRAINING DANS CELL.TO SI ILS Y SONT, NONE SINON
    #POUR LE CENTRE ET LES FEATURES C'EST LA MOYENNE DES OBJETS DU SINGLET
    singlets, doublets = newFrameLot.getTrainingUplets()
    #print "la je dois retrouver le training normalement :"
    ##SACHANT QUE LES EVENEMENTS A PLUS DE DEUX NE SONT PAS PRIS EN COMPTE
    #print "SINGLETS"
    #merge = 0
    #move = 0
    #split = 0
    #dis = 0
    #app = 0
    #for p in singlets:
    #    for w in singlets[p]:
    #        for i in singlets[p][w]:
    #            sin = singlets[p][w][i]
    #            print p, w, i
    #            out = ""
    #            for s in sin:
    #                out+=str(s.label)+" "
    #                if s.to is not None and len(s.to)>1 and s.label !=-1: 
    #                    #print "-----------------split", s.label, s.to
    #                    split+=1
    #                elif s.to==(-1,): dis+=1
    #                elif s.to is not None and s.label == -1 : 
    #                    app+=len(s.to)
    #                    print "**********appear", s.label, s.to
    #                elif s.to is not None and len(s.to)==1: move +=1
    #                elif s.to is not None: print "surprise", s.to
    #                try: 
    #                    if len(s.fr)>1:
    #                        print "la il y a un merge que je devrais retrouver dans la liste des doublets", s.fr
    #                except TypeError:
    #                    if s.label !=-1:
    #                        print "type error"
    #                finally:
    #                    pass
    #            print out
    #print move, split, dis, app
    #print "DOUBLETS"
    #for p in doublets:
    #    for w in doublets[p]:
    #        for i in doublets[p][w]:
    #            sin = doublets[p][w][i]
    #            #print p, w, i
    #            for s in sin:
    #                #print s.label, s.to
    #                if s.to is not None: merge+=1
    #print merge
    
    #f, featuresNames = treatments.returnBadFeatures()
    #VOIR AUSSI : EST-CE QUE L'ON NORMALISE ?
#    featuresToKeep = [221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238]
    solutions = joining.j(singlets, doublets, FEATURE_NUMBER)
    #la j'ecris tlm ds un fichier txt au cas ou
#    file = open("results/outputSoluUnNorm.txt", "w")
#    file.write(solutions.output())
#    file.close()
    
    return solutions
Beispiel #2
0
def gettingSolu(plate, pheno_only=False):
    global monOutput
    global FEATURE_NUMBER
    global loadingFolder
    global first
    global ctrl; global pheno
    #tableau qui contiendra toutes les features de tlm pr voir lesquelles contiennent des NaN
    tabF= None
    
    print "current directory ", os.getcwd()
    fichier = open(loadingFolder+"featuresToDelete.pkl", 'r')
    f = pickle.load(fichier)
    fichier.close()
    
    #print os.getcwd()
    newFrameLot = None

    print plate
    listW = os.listdir('/media/lalil0u/New/workspace2/Tracking/data/raw/'+plate)
    for well in listW:
        well=well[:-5]
        print well
        if pheno_only:
            if well not in d_ctrl[plate]:
                if pheno>0:
                    print "------------------------PAS PRIS------------------------"
                    pheno-=1
                    continue
                else:
                    print "PHENO"
            else:
                if ctrl>0:
                    print "------------------------PAS PRIS------------------------"
                    ctrl-=1
                    continue
                else:
                    print "CONTROL"
                
        filename = '/media/lalil0u/New/workspace2/Tracking/data/raw/'+plate+"/"+well+".hdf5"
        filenameT = '/media/lalil0u/New/workspace2/Tracking/data/TSinclMD/PL'+plate+"___P"+well+"___T00000.xml"

        monOutput+="plate = "+plate+",well = "+well+"\n"
        #ajout du frameLot et du tabF
        frameLotC, tabFC = test.gettingRaw(filename, filenameT, plate, well)
        if newFrameLot == None:
            newFrameLot = frameLotC 
        else: newFrameLot.addFrameLot(frameLotC)
        tabF = tabFC if tabF == None else np.vstack((tabF, tabFC))
    
    #    print "final training set content :"
    #    count, total= newFrameLot.statisticsTraining2()
    #    print count, total
    if newFrameLot is None:
        return None, None
        
    c, f2 = treatments.whichAreNan(tabF)
    #print len(f2.keys()), len(f)
#    if len(f)<len(f2.keys()):
#        pdb.set_trace()
#        
    #if there are features with NaN entries in the predict data but not in the training data
    toZeros = filter(lambda x: x not in f, f2.keys())
    #pdb.set_trace()
    if toZeros !=[]:
        msg="WARNING WARNING WARNING, some features here have NaN entries, and this was not the case in the training set. They are put to 0"
        warnings.warn(msg)
        newFrameLot.zeros(toZeros)
    newFrameLot.clean(f)
    if first:
        FEATURE_NUMBER -=len(f)
    
    print "Feature number", FEATURE_NUMBER
    print "Getting all uplets now"
    print "TIME TIME TIME", time.clock()
    #ICI ON RECUPERE DONC LES SINGLETS ET DOUBLETS AVEC LA VALEUR DU TRAINING DANS CELL.TO SI ILS Y SONT, NONE SINON
    #POUR LE CENTRE ET LES FEATURES C'EST LA MOYENNE DES OBJETS DU SINGLET
    singlets, doublets = newFrameLot.getAllUplets()
    #pour l'instant je ne garde que le passage de l'image 0 a 1
    print "TIME TIME TIME after getting all uplets", time.clock()
    print "Joining uplets now"
    
    solutions = joining.j(singlets, doublets, FEATURE_NUMBER, training = False)
    print "TIME TIME TIME after joining", time.clock()
    
    return solutions, newFrameLot