コード例 #1
0
def forMatthieu2(file_features='../resultData/features_on_films/all_distances_whole_dataonly.pkl', file_labels='../resultData/features_on_films/labelsKM_whole_k8.pkl',
                 file_coordinates='../resultData/features_on_films/coordinates_1000first.pkl',
                 out_feature_file='../resultData/features_on_films/traj_cluster{}_trajectory_features.csv',
                 out_coordinate_file='../resultData/features_on_films/traj_cluster{}_trajectory_coordinates.csv'):
    
    small_nr=None; small_coordinates=None
    
    f=open(file_features)
    data=pickle.load(f); f.close()
    data=data[0][:356905]
    r=np.hstack((data[:,:len(featuresNumeriques)], data[:,featuresSaved.index('mean persistence'), np.newaxis], data[:, featuresSaved.index('mean straight'), np.newaxis]))
    
    f=open(file_labels)
    labels=pickle.load(f); f.close()
    labels=labels[0][:356905]
    
    f=open(file_coordinates)
    coordinates=np.array(pickle.load(f));f.close()
    i=0
    for k in [7,0,3,5,4,2,6,1]:#range(begin_, num_clusters):#ORDRE UTILISE POUR LE PAPIER ISBI 
        where_=np.where(np.array(labels)==k)[0]
        np.random.shuffle(where_)
        
        f=open(out_feature_file.format(i), 'w')
        writer=csv.writer(f)
        writer.writerows(r[where_[:1000]])
        f.close()
        
        f=open(out_coordinate_file.format(i), 'w')
        writer=csv.writer(f)
        for el in coordinates[where_[:1000]]:
            writer.writerow(zip(el[:,0], el[:,1]))
        f.close()
        
        i+=1
コード例 #2
0
ファイル: sandbox.py プロジェクト: lalil0u/Xb_screen
def findingMeans(exp_list, feature1, feature2, folder= '/share/data20T/mitocheck/tracking_results/', filename="hist_tabFeatures_{}.pkl", plot=True, result=None):
    '''
    For interesting plots: getting means and medians for a couple of features, then getting wells for extreme values
    '''
    if result is None:
        result=np.zeros(shape=len(exp_list, 4))
        for k, exp in enumerate(exp_list):
            pl,w=exp; print k
        try:
            f=open(os.path.join(folder, pl, filename.format(w)))
            arr,_,_=pickle.load(f)
            f.close()
        except OSError:
            print "no ", pl, w
        else:                 
            result[k]=[nanmean(arr[:,featuresSaved.index(feature1)]), nanmedian(arr[:,featuresSaved.index(feature1)]),\
                        nanmean(arr[:,featuresSaved.index(feature2)]), nanmedian(arr[:,featuresSaved.index(feature2)])]
            
        f=open('result__{}{}.pkl'.format(feature1, feature2), 'w')
        pickle.dump(result, f); f.close()
    
    if plot:
        f=p.figure()
        ax=f.add_subplot(121)
        ax.scatter(result[:,0], result[:,2]); ax.set_title('Means'); ax.set_xlabel(feature1); ax.set_ylabel(feature2)
        ax.axhline(scoreatpercentile(result[:,2],90)); ax.axhline(scoreatpercentile(result[:,2],10))
        ax.axvline(scoreatpercentile(result[:,0],90)); ax.axvline(scoreatpercentile(result[:,0],10))
        
        ax=f.add_subplot(122)
        ax.scatter(result[:,1], result[:,3]); ax.set_title('Medians'); ax.set_xlabel(feature1); ax.set_ylabel(feature2)
        ax.axhline(scoreatpercentile(result[:,3],90)); ax.axhline(scoreatpercentile(result[:,3],10))
        ax.axvline(scoreatpercentile(result[:,1],90)); ax.axvline(scoreatpercentile(result[:,1],10))
        p.show()
    mean_result=[0,0,0,0]
    mean_result[1] = exp_list[np.where((result[:,0]>scoreatpercentile(result[:,0],90)) & ((result[:,2])>scoreatpercentile(result[:,2],90)))]
    mean_result[2] = exp_list[np.where((result[:,0]>scoreatpercentile(result[:,0],90)) & ((result[:,2])<scoreatpercentile(result[:,2],10)))]
    mean_result[0] = exp_list[np.where((result[:,0]<scoreatpercentile(result[:,0],10)) & ((result[:,2])>scoreatpercentile(result[:,2],90)))]
    mean_result[3] = exp_list[np.where((result[:,0]<scoreatpercentile(result[:,0],10)) & ((result[:,2])<scoreatpercentile(result[:,2],10)))]
    
    return mean_result
コード例 #3
0
 def getData(self, histDataAsWell):
     '''
     Ici sur toutes les experiences dans self.expList on construit l'histogramme de toutes les features numeriques
     '''
     histDict = defaultdict(list)
     _,r, _, _,_, length, _, _, _ = histConcatenation(self.settings.data_folder, self.expList, self.settings.mitocheck_file,
                                     self.settings.quality_control_file, verbose=self.verbose)
     for feature in self.currInterestFeatures:
         for i in range(len(length)):
             histDict[feature].append(r[np.sum(length[:i]):np.sum(length[:i+1]),featuresSaved.index(feature)])
                 
     histogrammes, bins = computingBins(histDict, [self.bin_size for k in range(len(self.currInterestFeatures))], self.bin_type, iter_=self.iter_ )
                 
     return histogrammes, bins
コード例 #4
0
 def _findNumerical(self, data, percentile, how_many):
     index=featuresSaved.index(self.feature_name)
     scoreD = scoreatpercentile(data[:,index], percentile)
     down=np.where(data[:,index]<=scoreD)
     scoreU = scoreatpercentile(data[:,index], 100-percentile)
     up=np.where(data[:,index]>=scoreU)
         
     med0, med1=scoreatpercentile(data[:,index], 48), scoreatpercentile(data[:,index], 52)
     ctrl0 = np.where(data[:,index]>=med0)
     ctrl1=np.where(data[:, index]<=med1)
     ctrl=filter(lambda x: x in ctrl1[0], ctrl0[0])
     
     if self.verbose:
         print "Feature {}".format(self.feature_name)
         print "{} retrieved, percentile {}, value {}".format(up[0], 100-percentile, scoreU)
         print "{} retrieved, percentile {}, value {}".format(down[0],percentile, scoreD)
         print "{} retrieved, percentile {}, value {}".format(np.array(ctrl), percentile, med0)
     return down[0], up[0], ctrl
コード例 #5
0
    def wellToPlot(self, length,who, all_, outputFolder, median=False):
        assert getpass.getuser()!='lalil0u', 'You are not running this on the right computer'
        index=featuresSaved.index(self.feature_name)
        folder='/share/data20T/mitocheck/tracking_results'
        basename = 'traj'
        resultCour=[]; sizes=[0]
        valCour=[]
        #if median: basename+='_median'
        for el in all_:
            trajectories=all_[el]
            print el
            for plate, well in trajectories:
                print plate, well
                if self.verbose:
                    print "Taking care of plate {}, well {}".format(plate, well)
        
                f=open(os.path.join(folder,plate, 'hist_tabFeatures_{}.pkl'.format(well)))
                tab, coord, _=pickle.load(f); f.close()
                resultCour.extend(np.array(coord)[trajectories[(plate, well)]])
                valCour.extend(tab[trajectories[(plate, well)], index])
            sizes.append(len(resultCour))

        XX=[]; YY=[]
        for k in range(len(resultCour)):
            X=np.array(resultCour[k][1]); X-=X[0]; XX.append(X)
            Y=np.array( resultCour[k][2]); Y-=Y[0]; YY.append(Y)
        minx,maxx = min([min(X) for X in XX]), max([max(X) for X in XX])
        miny,maxy = min([min(Y) for Y in YY]), max([max(Y) for Y in YY])
        
        #saving trajectories 
        f=open('trajectories_{}.pkl'.format(self.feature_name), 'w')
        pickle.dump([all_, resultCour, valCour, sizes],f)
        f.close()
        
        i=0
        for el in all_:
            for k in range(sizes[i], min(sizes[i]+50,sizes[i+1])):
                plotAlignedTraj(XX[k], YY[k], minx, maxx, miny, maxy, show=False, 
                                name=os.path.join(outputFolder, '{}_{}_{}_{}.png'.format(basename,el, self.feature_name, k)), val=valCour[k])
            i+=1
        return 1#_writeXml(plate, well, resultCour)
コード例 #6
0
def collectingData(iter_, expList, debut, fin):
    folder = "/cbio/donnees/aschoenauer/workspace2/Xb_screen/resultData/experiment_clustering/"

    histDict = defaultdict(list)

    _, r, _, who, ctrlStatus, length, genes, siRNAs, _ = histConcatenation(
        "/share/data20T/mitocheck/tracking_results",
        expList[debut:fin],
        "/cbio/donnees/aschoenauer/workspace2/Xb_screen/data/mitocheck_siRNAs_target_genes_Ens72.txt",
        "/cbio/donnees/aschoenauer/workspace2/Xb_screen/data/qc_export.txt",
    )

    for i in range(len(length)):
        for k, feature in enumerate(interestFeatures):
            histDict[feature].append(r[np.sum(length[:i]) : np.sum(length[: i + 1]), featuresSaved.index(feature)])

    f = open("../resultData/experiment_clustering/distExp_ctrl_quantile_10.pkl")
    bins = pickle.load(f)
    f.close()

    histogrammes, bins = computingBins(histDict, [10 for k in range(16)], "quantile", previous_binning=bins)
    f = open(os.path.join(folder, "data_{}.pkl".format(iter_)), "w")
    pickle.dump((histogrammes, who, ctrlStatus, genes, siRNAs), f)
    f.close()
コード例 #7
0
    def _dataPrep(self, pcaParameter):
        histDict = defaultdict(list)

        ctrlExp = appendingControl(self.expList)
        ctrlExp = countingDone(ctrlExp)
        np.random.shuffle(ctrlExp)
        ctrlExp = ctrlExp[: int(0.2 * len(self.expList))]
        if self.verbose:
            print ctrlExp
        self.expList.extend(ctrlExp)

        _, r, _, _, _, length, _, _, _ = histConcatenation(
            self.settings.data_folder,
            self.expList,
            self.settings.mitocheck_file,
            self.settings.quality_control_file,
            verbose=self.verbose,
        )
        for i in range(len(length)):
            for k, feature in enumerate(self.currInterestFeatures):
                histDict[feature].append(r[np.sum(length[:i]) : np.sum(length[: i + 1]), featuresSaved.index(feature)])

        f = open(
            os.path.join(self.settings.result_folder, "distExp_ctrl_{}_{}.pkl".format(self.bins_type, self.bin_size))
        )
        bins = pickle.load(f)
        f.close()

        histogrammes, bins = computingBins(
            histDict,
            [self.bin_size for k in range(len(self.currInterestFeatures))],
            self.bins_type,
            previous_binning=bins,
        )
        print histogrammes.shape
        return histogrammes, bins
コード例 #8
0
def replotHeatmap(folder, data_filename, indices, outputfile,action='hierarchical', level=0.4,trad=False,
                  num_clusters=8, labels_filename='labelsKM_whole_k{}.pkl', pcaed_filename='all_distances_whole2_pcaed.pkl',
                  with_timelength=False):
    
    f=open(os.path.join(folder,data_filename))
    data=pickle.load(f); f.close(); r=data[0]
    
    featuresToKeep=['effective space length','mean squared displacement', 'entropy1','diffusion adequation','movement type', 'signed turning angle','corrected straightness index', 
                    'mean straight']
    if featuresToKeep is not None:
        r=np.hstack((r[:,featuresSaved.index(feat), np.newaxis] for feat in featuresToKeep))
        features=featuresToKeep
    else:
        features=list(featuresNumeriques); features.append('mean persistence'); features.append('mean straight')
        r=np.hstack((r[:,:len(featuresNumeriques)], r[:,featuresSaved.index('mean persistence'), np.newaxis], r[:, featuresSaved.index('mean straight'), np.newaxis]))
    
    fL=list(features)
    if with_timelength:
        time_length=r[-1]
        r=np.hstack((r, np.array(time_length)[:,np.newaxis])); fL.append('time length')
    
    mean_=np.mean(r,0)
    for k,feature in enumerate(features):
        print feature, mean_[k]
    nr=(r-mean_)/np.std(r,0)

    #nr=np.hstack((nr, indices[:,np.newaxis])); fL.append('hierarchical cluster')
    small_nr=None
    
    if action=='hierarchical':
        num_clusters=len(np.bincount(indices)); begin_=1
        print 'Going for hierarchical clustering (ward, euclidean) with {} clusters'.format(num_clusters-1)

    elif action=='kmeans':
        print 'Going for mini batch k-means with {} clusters'.format(num_clusters); begin_=0
        try:
            f=open(os.path.join(folder, labels_filename.format(num_clusters)), 'r')
            labels, percentages, who, length=pickle.load(f); f.close()
        except OSError:
            print 'File Error ', os.path.join(folder, labels_filename.format(num_clusters))
        else:
            indices=labels

#        f=open(os.path.join(folder, pcaed_filename), 'r')
#        _,pcaed_data=pickle.load(f); f.close()
#        
#        model = MiniBatchKMeans(n_clusters=num_clusters, batch_size = 2000, init='k-means++',n_init=1000,max_iter=1000, max_no_improvement=100, compute_labels = True)
#        indices = model.fit(pcaed_data[:,:7])
#        indices=indices.labels_

    for k in [7,0,3,5,4,2,6,1]:#range(begin_, num_clusters):#ORDRE UTILISE POUR LE PAPIER ISBI mais pas la petite heatmap [7,0,3,5,4,2,6,1]:#
        where_=np.where(np.array(indices)==k)[0]
        np.random.shuffle(where_)
        small_nr = np.vstack((small_nr, nr[where_[:1000]])) if small_nr is not None else nr[where_[:1000]]
    print small_nr.shape

    print 'Showing trajectory clusters'
    heatmap(small_nr.T,fL, range(small_nr.shape[0]), None, None, None, None, 
            color_gradient='OrRd', filename=outputfile+'TRAJ', trad=False, save=False)
    if action=='kmeans':
#        num_experiments=np.where(np.array(genes)=='ctrl')[0][0]
        heatmap(percentages, who,range(begin_, num_clusters), 'ward', 'ward', 'euclidean', 'euclidean', 
            color_gradient='red_white_blue', filename=outputfile+'MOV', trad=False, save=False, level=level)
#        heatmap(percentages[num_experiments:], genes[num_experiments:],range(begin_, num_clusters), None, 'ward', None, 'euclidean', 
#            color_gradient='red_white_blue', filename=outputfile+'CTRL', trad=False, save=False, level=level)

    
    return
コード例 #9
0
def exploitingKMeans_wModel(model, data, mean, std, pca_std, pca, num_PC=7):
    data=np.hstack((data[:,:len(featuresNumeriques)], data[:, featuresSaved.index('mean straight'), np.newaxis]))
    pca_data=pca.transform((data-mean)/std)/pca_std
    return np.bincount(model.predict(pca_data[:,:num_PC]), minlength=8)
コード例 #10
0
    simulateur = TrajectorySimulator(settings_filename='tracking/settings/settings_simulator_14_10_20.py')
    simulateur('simulated_trajectories', 0, options.movement_type_index, "hist_tabFeatures{}_W{}.pkl".format(options.iter, options.movement_type_index))

    f=open('../resultData/simulated_traj/trajectories_for_clustering/hist_tabFeatures{}_W{}.pkl'.format(options.iter, options.movement_type_index))
    tabFeatures, _, _ = pickle.load(f)
    r2 = histLogTrsforming(tabFeatures)  
    print r2.shape, 'not normalized'
    r2=r2[:,:-1]
    
    if np.any(np.isnan(r2)):
        print 'Deleting nan values'
        r2=np.delete(r2, np.where(np.isnan(r2))[0], 0)
    
    #r=(r2-np.mean(r2,0))/np.std(r2,0)
    
    r=np.hstack((r2[:,:len(featuresNumeriques)], r2[:, featuresSaved.index('mean straight'), np.newaxis]))
    print r.shape
    #print 'computing bins'
    #histogrammeMatrix = computingBins(histNC, mat_hist_sizes[0])
    print 'saving'
    f=open('../resultData/simulated_traj/trajectories_for_clustering/data_sim_traj{}{}.pkl'.format(options.iter, options.movement_type_index), 'w')
    pickle.dump(r, f); f.close()

#
#if __name__ == '__main__':
#    p=PlateSimulator(settings_filename="tracking/settings/settings_simulator_14_10_20.py")
#    
#    p()
#