def gettingSolu(loadingFolder,allDataFolder): global FEATURE_NUMBER #var pour indiquer si on prend un toy example ou pas puppet = False #tableau qui contiendra toutes les features de tlm pr voir lesquelles contiennent des NaN tabF= None ctrl=7; pheno = 6 newFrameLot = None dataFolder=os.path.join(allDataFolder, 'raw') listD = os.listdir(dataFolder) for plate in listD: print plate listW = os.listdir(os.path.join(dataFolder, plate)) for well in listW: well=well[:-5] print well # from trajPack import d_ctrl # if well not in d_ctrl[plate]: # if pheno<=0: # print "------------------------PAS PRIS------------------------" # continue # else: # print "PHENO" # pheno-=1 # else: # if ctrl<=0: # print "------------------------PAS PRIS------------------------" # continue # else: # print "CONTROL" # ctrl-=1 filename = os.path.join(dataFolder, plate,well+".hdf5") if puppet: filenameT = os.path.join(allDataFolder, 'puppet_trainingset', 'PL'+plate+"___P"+well+"___T00000.xml") else: filenameT =os.path.join(allDataFolder,'trainingset', 'PL'+plate+"___P"+well+"___T00000.xml") #ajout du frameLot et du tabF frameLotC, tabFC = gettingRaw(filename, filenameT, plate, well) if frameLotC==None: sys.stdout.write("File {} containing data for plate {}, well {} does not contain all necessary data".format(filename, plate, well)) continue if newFrameLot == None: newFrameLot = frameLotC else: newFrameLot.addFrameLot(frameLotC) tabF = tabFC if tabF == None else np.vstack((tabF, tabFC)) print "final training set content :" count, total= newFrameLot.statisticsTraining2() print count, total #en ce qui concerne le nettoyage des NaN c, f = treatments.whichAreNan(tabF) print tabF.shape featuresToDelete = f.keys() newFrameLot.clean(featuresToDelete) ##np.delete(X, f, 1) FEATURE_NUMBER -=len(featuresToDelete) fichier = open(os.path.join(loadingFolder, "featuresToDelete.pkl"), 'w') pickle.dump(featuresToDelete, fichier) fichier.close() print FEATURE_NUMBER print "uplets now" #ICI ON RECUPERE DONC LES SINGLETS ET DOUBLETS AVEC LA VALEUR DU TRAINING DANS CELL.TO SI ILS Y SONT, NONE SINON #POUR LE CENTRE ET LES FEATURES C'EST LA MOYENNE DES OBJETS DU SINGLET singlets, doublets = newFrameLot.getTrainingUplets() return j(singlets, doublets, FEATURE_NUMBER)
def gettingSolu(plate, pheno_only=False): global monOutput global FEATURE_NUMBER global loadingFolder global first global ctrl; global pheno #tableau qui contiendra toutes les features de tlm pr voir lesquelles contiennent des NaN tabF= None print "current directory ", os.getcwd() fichier = open(loadingFolder+"featuresToDelete.pkl", 'r') f = pickle.load(fichier) fichier.close() #print os.getcwd() newFrameLot = None print plate listW = os.listdir('/media/lalil0u/New/workspace2/Tracking/data/raw/'+plate) for well in listW: well=well[:-5] print well if pheno_only: if well not in d_ctrl[plate]: if pheno>0: print "------------------------PAS PRIS------------------------" pheno-=1 continue else: print "PHENO" else: if ctrl>0: print "------------------------PAS PRIS------------------------" ctrl-=1 continue else: print "CONTROL" filename = '/media/lalil0u/New/workspace2/Tracking/data/raw/'+plate+"/"+well+".hdf5" filenameT = '/media/lalil0u/New/workspace2/Tracking/data/TSinclMD/PL'+plate+"___P"+well+"___T00000.xml" monOutput+="plate = "+plate+",well = "+well+"\n" #ajout du frameLot et du tabF frameLotC, tabFC = test.gettingRaw(filename, filenameT, plate, well) if newFrameLot == None: newFrameLot = frameLotC else: newFrameLot.addFrameLot(frameLotC) tabF = tabFC if tabF == None else np.vstack((tabF, tabFC)) # print "final training set content :" # count, total= newFrameLot.statisticsTraining2() # print count, total if newFrameLot is None: return None, None c, f2 = treatments.whichAreNan(tabF) #print len(f2.keys()), len(f) # if len(f)<len(f2.keys()): # pdb.set_trace() # #if there are features with NaN entries in the predict data but not in the training data toZeros = filter(lambda x: x not in f, f2.keys()) #pdb.set_trace() if toZeros !=[]: msg="WARNING WARNING WARNING, some features here have NaN entries, and this was not the case in the training set. They are put to 0" warnings.warn(msg) newFrameLot.zeros(toZeros) newFrameLot.clean(f) if first: FEATURE_NUMBER -=len(f) print "Feature number", FEATURE_NUMBER print "Getting all uplets now" print "TIME TIME TIME", time.clock() #ICI ON RECUPERE DONC LES SINGLETS ET DOUBLETS AVEC LA VALEUR DU TRAINING DANS CELL.TO SI ILS Y SONT, NONE SINON #POUR LE CENTRE ET LES FEATURES C'EST LA MOYENNE DES OBJETS DU SINGLET singlets, doublets = newFrameLot.getAllUplets() #pour l'instant je ne garde que le passage de l'image 0 a 1 print "TIME TIME TIME after getting all uplets", time.clock() print "Joining uplets now" solutions = joining.j(singlets, doublets, FEATURE_NUMBER, training = False) print "TIME TIME TIME after joining", time.clock() return solutions, newFrameLot
def gettingSolu(): global monOutput global FEATURE_NUMBER #var pour indiquer si on prend un toy example ou pas puppet = False #tableau qui contiendra toutes les features de tlm pr voir lesquelles contiennent des NaN tabF= None #print os.getcwd() newFrameLot = None listD = os.listdir('/media/lalil0u/New/workspace2/Tracking/data/raw') for plate in listD: print plate listW = os.listdir('/media/lalil0u/New/workspace2/Tracking/data/raw/'+plate) for well in listW: well=well[:-5] print well filename = '/media/lalil0u/New/workspace2/Tracking/data/raw/'+plate+"/"+well+".hdf5" if puppet: filenameT = '/media/lalil0u/New/workspace2/Tracking/data/puppet_trainingset/PL'+plate+"___P"+well+"___T00000.xml" else: filenameT = '/media/lalil0u/New/workspace2/Tracking/data/trainingset/PL'+plate+"___P"+well+"___T00000.xml" monOutput+="plate = "+plate+",well = "+well+"\n" #ajout du frameLot et du tabF frameLotC, tabFC = gettingRaw(filename, filenameT, plate, well) if newFrameLot == None: newFrameLot = frameLotC else: newFrameLot.addFrameLot(frameLotC) tabF = tabFC if tabF == None else np.vstack((tabF, tabFC)) print "final training set content :" count, total= newFrameLot.statisticsTraining2() print count, total #en ce qui concerne le nettoyage des NaN c, f = treatments.whichAreNan(tabF) print tabF.shape featuresToDelete = f.keys() # for morpho in filter(lambda x : x not in featuresToDelete, l_indexes): # featuresToDelete.append(morpho) tabF = treatments.clean(tabF, f.keys()) newFrameLot.clean(featuresToDelete) ##np.delete(X, f, 1) FEATURE_NUMBER -=len(featuresToDelete) fichier = open("../results/featuresToDelete.pkl", 'w') pickle.dump(featuresToDelete, fichier) fichier.close() print FEATURE_NUMBER #lstCellsT, lstCellsF, X, Y, Xz, Z = newFrameLot.getTraining2() print "uplets now" #ICI ON RECUPERE DONC LES SINGLETS ET DOUBLETS AVEC LA VALEUR DU TRAINING DANS CELL.TO SI ILS Y SONT, NONE SINON #POUR LE CENTRE ET LES FEATURES C'EST LA MOYENNE DES OBJETS DU SINGLET singlets, doublets = newFrameLot.getTrainingUplets() #print "la je dois retrouver le training normalement :" ##SACHANT QUE LES EVENEMENTS A PLUS DE DEUX NE SONT PAS PRIS EN COMPTE #print "SINGLETS" #merge = 0 #move = 0 #split = 0 #dis = 0 #app = 0 #for p in singlets: # for w in singlets[p]: # for i in singlets[p][w]: # sin = singlets[p][w][i] # print p, w, i # out = "" # for s in sin: # out+=str(s.label)+" " # if s.to is not None and len(s.to)>1 and s.label !=-1: # #print "-----------------split", s.label, s.to # split+=1 # elif s.to==(-1,): dis+=1 # elif s.to is not None and s.label == -1 : # app+=len(s.to) # print "**********appear", s.label, s.to # elif s.to is not None and len(s.to)==1: move +=1 # elif s.to is not None: print "surprise", s.to # try: # if len(s.fr)>1: # print "la il y a un merge que je devrais retrouver dans la liste des doublets", s.fr # except TypeError: # if s.label !=-1: # print "type error" # finally: # pass # print out #print move, split, dis, app #print "DOUBLETS" #for p in doublets: # for w in doublets[p]: # for i in doublets[p][w]: # sin = doublets[p][w][i] # #print p, w, i # for s in sin: # #print s.label, s.to # if s.to is not None: merge+=1 #print merge #f, featuresNames = treatments.returnBadFeatures() #VOIR AUSSI : EST-CE QUE L'ON NORMALISE ? # featuresToKeep = [221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238] solutions = joining.j(singlets, doublets, FEATURE_NUMBER) #la j'ecris tlm ds un fichier txt au cas ou # file = open("results/outputSoluUnNorm.txt", "w") # file.write(solutions.output()) # file.close() return solutions
def gettingSolu(loadingFolder, hdf5Folder = '/media/lalil0u/New/workspace2/Tracking/data/predict/', plate=None, wellL=None, training = False, first=True, xb_screen=False): global monOutput global FEATURE_NUMBER tabF = None #var pour indiquer si on prend un toy example ou pas #tableau qui contiendra toutes les features de tlm pr voir lesquelles contiennent des NaN # print "current directory ", os.getcwd() fichier = open(loadingFolder+"featuresToDelete.pkl", 'r') f = pickle.load(fichier) fichier.close() newFrameLot = None listP = os.listdir(hdf5Folder) if plate is not None: listP=[plate] for plate in listP: print plate listW = sorted(os.listdir(os.path.join(hdf5Folder, plate, 'hdf5'))) if wellL is not None: listW = wellL for well in listW[:18]: well=well.split('_')[0]+'_'+well.split('_')[1][:2] if not xb_screen: filename = os.path.join(hdf5Folder, plate, 'hdf5', well+".hdf5") else: filename = os.path.join(hdf5Folder, plate, 'hdf5', well+".ch5") print well if training: filenameT = '/media/lalil0u/New/workspace2/Tracking/data/trainingset/PL'+plate+"___P"+well+"___T00000.xml" else: filenameT = None monOutput+="plate = "+plate+",well = "+well+"\n" #ajout du frameLot et du tabF frameLotC, tabFC = gettingRaw(filename, filenameT, plate, well, name_primary_channel='primary__primary3') if newFrameLot == None: newFrameLot = frameLotC else: newFrameLot.addFrameLot(frameLotC) tabF = tabFC if tabF == None else np.vstack((tabF, tabFC)) #en ce qui concerne le nettoyage des NaN c, f2 = treatments.whichAreNan(tabF) print len(f2.keys()), len(f) #if there are features with NaN entries in the predict data but not in the training data toZeros = filter(lambda x: x not in f, f2.keys()) if toZeros !=[]: print "Attention attention, some features here have NaN entries, and this was not the case in the training set. They are put to 0" newFrameLot.zeros(toZeros) # if f!= f2.keys(): # featuresNames = imp.importFeaturesNames(filename) # print filter(lambda x: x not in f, f2.keys()) # pdb.set_trace() newFrameLot.clean(f) ##np.delete(X, f, 1) if first: FEATURE_NUMBER -=len(f) #FEATURE_NUMBER -=len(bou) print FEATURE_NUMBER print "Getting all uplets now" print "TIME TIME TIME", time.clock() #ICI ON RECUPERE DONC LES SINGLETS ET DOUBLETS AVEC LA VALEUR DU TRAINING DANS CELL.TO SI ILS Y SONT, NONE SINON #POUR LE CENTRE ET LES FEATURES C'EST LA MOYENNE DES OBJETS DU SINGLET if training == False: singlets, doublets = newFrameLot.getAllUplets(loadingFolder) else: singlets, doublets = newFrameLot.getTrainingUplets() print "TIME TIME TIME after getting all uplets", time.clock() print "joining uplets now" solutions = j(singlets, doublets, FEATURE_NUMBER, training) print "TIME TIME TIME after joining", time.clock() print "normalization" fichier = open(loadingFolder+"minMax_data_all.pkl", "r") minMax = pickle.load(fichier) fichier.close() solutions.normalisation(minMax) print "TIME TIME TIME after normalization", time.clock() return solutions