def getData(self, histDataAsWell): ''' Ici sur toutes les experiences dans self.expList on construit l'histogramme de toutes les features numeriques ''' histDict = defaultdict(list) _,r, _, _,_, length, _, _, _ = histConcatenation(self.settings.data_folder, self.expList, self.settings.mitocheck_file, self.settings.quality_control_file, verbose=self.verbose) for feature in self.currInterestFeatures: for i in range(len(length)): histDict[feature].append(r[np.sum(length[:i]):np.sum(length[:i+1]),featuresSaved.index(feature)]) histogrammes, bins = computingBins(histDict, [self.bin_size for k in range(len(self.currInterestFeatures))], self.bin_type, iter_=self.iter_ ) return histogrammes, bins
def collectingData(iter_, expList, debut, fin): folder = "/cbio/donnees/aschoenauer/workspace2/Xb_screen/resultData/experiment_clustering/" histDict = defaultdict(list) _, r, _, who, ctrlStatus, length, genes, siRNAs, _ = histConcatenation( "/share/data20T/mitocheck/tracking_results", expList[debut:fin], "/cbio/donnees/aschoenauer/workspace2/Xb_screen/data/mitocheck_siRNAs_target_genes_Ens72.txt", "/cbio/donnees/aschoenauer/workspace2/Xb_screen/data/qc_export.txt", ) for i in range(len(length)): for k, feature in enumerate(interestFeatures): histDict[feature].append(r[np.sum(length[:i]) : np.sum(length[: i + 1]), featuresSaved.index(feature)]) f = open("../resultData/experiment_clustering/distExp_ctrl_quantile_10.pkl") bins = pickle.load(f) f.close() histogrammes, bins = computingBins(histDict, [10 for k in range(16)], "quantile", previous_binning=bins) f = open(os.path.join(folder, "data_{}.pkl".format(iter_)), "w") pickle.dump((histogrammes, who, ctrlStatus, genes, siRNAs), f) f.close()
def _dataPrep(self, pcaParameter): histDict = defaultdict(list) ctrlExp = appendingControl(self.expList) ctrlExp = countingDone(ctrlExp) np.random.shuffle(ctrlExp) ctrlExp = ctrlExp[: int(0.2 * len(self.expList))] if self.verbose: print ctrlExp self.expList.extend(ctrlExp) _, r, _, _, _, length, _, _, _ = histConcatenation( self.settings.data_folder, self.expList, self.settings.mitocheck_file, self.settings.quality_control_file, verbose=self.verbose, ) for i in range(len(length)): for k, feature in enumerate(self.currInterestFeatures): histDict[feature].append(r[np.sum(length[:i]) : np.sum(length[: i + 1]), featuresSaved.index(feature)]) f = open( os.path.join(self.settings.result_folder, "distExp_ctrl_{}_{}.pkl".format(self.bins_type, self.bin_size)) ) bins = pickle.load(f) f.close() histogrammes, bins = computingBins( histDict, [self.bin_size for k in range(len(self.currInterestFeatures))], self.bins_type, previous_binning=bins, ) print histogrammes.shape return histogrammes, bins