def _dataPrep(self, pcaParameter): histDict = defaultdict(list) ctrlExp = appendingControl(self.expList) ctrlExp = countingDone(ctrlExp) np.random.shuffle(ctrlExp) ctrlExp = ctrlExp[: int(0.2 * len(self.expList))] if self.verbose: print ctrlExp self.expList.extend(ctrlExp) _, r, _, _, _, length, _, _, _ = histConcatenation( self.settings.data_folder, self.expList, self.settings.mitocheck_file, self.settings.quality_control_file, verbose=self.verbose, ) for i in range(len(length)): for k, feature in enumerate(self.currInterestFeatures): histDict[feature].append(r[np.sum(length[:i]) : np.sum(length[: i + 1]), featuresSaved.index(feature)]) f = open( os.path.join(self.settings.result_folder, "distExp_ctrl_{}_{}.pkl".format(self.bins_type, self.bin_size)) ) bins = pickle.load(f) f.close() histogrammes, bins = computingBins( histDict, [self.bin_size for k in range(len(self.currInterestFeatures))], self.bins_type, previous_binning=bins, ) print histogrammes.shape return histogrammes, bins
def globalSummaryScript(baseName, siRNAFile, n_clusters_min, n_clusters_max, div_name, lambda_, weights, bins_type, bin_size, cost_type, batch_size, n_init, init, ddim, iter_=0): f=open(siRNAFile, 'r') siRNAList = pickle.load(f); f.close() siExpDict = expSi(qc = quality_control_file , sens=0) jobCount = 0 i=0 total_expList = [] head = """#!/bin/sh cd %s""" %progFolder baseName = baseName+'{}{}_w{}_{}_{}_{}'.format(iter_,div_name[:5], weights, bins_type, bin_size, cost_type) #A. DEALING WITH EXPERIMENTS for siRNA in siRNAList: try: expList = siExpDict[siRNA] except KeyError: print "siRNA not in siRNA-experiment dictionary" else: expList = strToTuple(expList, os.listdir(data_folder)) total_expList.extend(expList) for plate, well in expList: jobCount += 1; i+=1 cmd = plateWellSummaryScript(plate, well, div_name, lambda_, weights, bins_type, bin_size, cost_type, batch_size, n_init, init, ddim, iter_) # this is now written to a script file (simple text file) # the script file is called ltarray<x>.sh, where x is 1, 2, 3, 4, ... and corresponds to the job index. script_name = os.path.join(scriptFolder, baseName+'{}.sh'.format(i)) script_file = file(script_name, "w") script_file.write(head + cmd) script_file.close() # make the script executable (without this, the cluster node cannot call it) os.system('chmod a+x %s' % script_name) #B. DEALING WITH CONTROLS ctrlExp = appendingControl(total_expList) ctrlExp = countingDone(ctrlExp) np.random.shuffle(ctrlExp) ctrlExp=ctrlExp[:int(0.2*len(total_expList))] for plate, well in ctrlExp: jobCount += 1; i+=1 cmd = plateWellSummaryScript(plate, well, div_name, lambda_, weights, bins_type, bin_size, cost_type, batch_size, n_init, init, ddim, iter_) # this is now written to a script file (simple text file) # the script file is called ltarray<x>.sh, where x is 1, 2, 3, 4, ... and corresponds to the job index. script_name = os.path.join(scriptFolder, baseName+'{}.sh'.format(i)) script_file = file(script_name, "w") script_file.write(head + cmd) script_file.close() # make the script executable (without this, the cluster node cannot call it) os.system('chmod a+x %s' % script_name) # write the main script array_script_name = '%s.sh' % os.path.join(scriptFolder, baseName) main_script_file = file(array_script_name, 'w') main_content = """#!/bin/sh %s #$ -o %s #$ -e %s %s$%s.sh """ % (path_command, pbsOutDir, pbsErrDir, os.path.join(scriptFolder, baseName), pbsArrayEnvVar) main_script_file.write(main_content) main_script_file.close() os.system('chmod a+x %s' % array_script_name) sub_cmd = 'qsub -t 1-%i %s' % (jobCount, array_script_name) print sub_cmd #C. DOING EXPERIMENT CLUSTERING STEP expFilename = 'exp_Simpson_{}.pkl'.format(int(time.time())) total_expList.extend(ctrlExp) f=open(expFilename, 'w') pickle.dump(total_expList, f) f.close() baseName = baseName+'_clustering' for n_clusters in range(n_clusters_min, n_clusters_max): script_name = os.path.join(scriptFolder, baseName+'{}.sh'.format(n_clusters-n_clusters_min)) script_file = file(script_name, "w") cmd=""" python tracking/histograms/summarization_clustering.py -a clustering --experimentFile %s -k %i --ddimensional %i --bins_type %s --cost_type %s --bin_size %i --div_name %s -w %i --init %s --batch_size %i --iter %i """ cmd %= ( expFilename, n_clusters, ddim, bins_type, cost_type, bin_size, div_name, weights, init, batch_size, iter_ ) script_file.write(head + cmd) script_file.close() os.system('chmod a+x %s' % script_name) # write the main script array_script_name = '%s.sh' % os.path.join(scriptFolder, baseName) main_script_file = file(array_script_name, 'w') main_content = """#!/bin/sh %s #$ -o %s #$ -e %s %s$%s.sh """ % (path_command, pbsOutDir, pbsErrDir, os.path.join(scriptFolder, baseName), pbsArrayEnvVar) main_script_file.write(main_content) main_script_file.close() os.system('chmod a+x %s' % array_script_name) sub_cmd = 'qsub -hold_jid -t 1-%i %s' % (n_clusters_max - n_clusters_min, array_script_name) print sub_cmd #D. GOING BACK TO EXPERIMENTS AND TESTING IF DIFFERENT FROM CONTROLS return 1