def __init__(self): # Setup Parameters util.params = None self.dnnModelPath = util.getFullFileName( util.getParameter('DnnModelPath')) self.numTrainingInstances = util.getParameter( 'NumActivationTrainingInstances') self.timestamp = datetime.datetime.now().strftime("%y%m%d_%H%M%S") self.outputName = util.getSetupFileDescription( ) + '--' + self.timestamp self.outputDir = 'output/%s' % (self.outputName) util.makeDirectory(self.outputDir) util.isLoggingEnabled = util.getParameter('LoggingEnabled') util.logPath = self.outputDir + '/%s.log' % (self.outputName) util.logLevel = util.getParameter('LogLevel') util.thisLogger = util.Logger() util.storeSetupParamsInLog() # Setup memory environment self.processorType = processorType = util.getParameter('ProcessorType') self.startTime = datetime.datetime.now() self.streamList = None self.clustererList = None self.classMaxValues1 = None # max value of raw activation data self.classMaxValues2 = None # max value of reduced activation data self.flatActivations = None self.activationBatches = None self.batchFlatActivations = None self.reducedFlatActivations = None
def sampling(): """ The main function of the sampling process. :return: """ # make directory for street images streetImageOutputFolder = CONFIG["sampling"]["streetImageOutputFolder"] makeDirectory(streetImageOutputFolder) # Get preprocessed point data intersectionPointFile = CONFIG["shapefile"]["intersectoinPointFile"] pointInfoFile = CONFIG["shapefile"]["pointInfoFilename"] pointInfo = readPointFile(pointInfoFile) intersectionPointInfo = readIntersectionPointInfo(intersectionPointFile) # Filter point data that has street images taken within the specified period. maxYear = CONFIG["gmap"]["streetImageMaxYear"] minYear = CONFIG["gmap"]["streetImageMinYear"] filteredPoints = filterPointByYear(pointInfo, maxYear, minYear) IMG_NAME_COL_NUM = 5 LAT_LNG_COL_NUM = 2 # Sample street images, the return is list of sample info sampleNum = CONFIG["sampling"]["sampleNum"] initImageNumber = CONFIG["sampling"]["initImageNumber"] sampleData = sampleAndDownloadStreetImage(filteredPoints, sampleNum, initImageNumber, initImageNumber, streetImageOutputFolder, intersectionPointInfo) imageNames = [ streetImageOutputFolder + "/" + data[IMG_NAME_COL_NUM] for data in sampleData ] links = GDriveUpload(imageNames, "Sampled_Image") for i in xrange(len(sampleData)): imageName = streetImageOutputFolder + "/" + sampleData[i][ IMG_NAME_COL_NUM] sampleData[i].append(links[imageName]) columnTitle = [ "Sample Number", "Sampled Point Number", "Latitude + Longitude", "Heading", "Date", "Image Name", "Road Types", "Web Link" ] sampleData.insert(0, columnTitle) # output to csv file outputCSV(sampleData, CONFIG["sampling"]["csvFilename"]) # plot images map sampledPoints = set( [divideGPS(d[LAT_LNG_COL_NUM]) for d in sampleData[1:]]) plotSampledPointMap(list(sampledPoints), CONFIG["sampling"]["sampledPointsMapFilename"])
def root_to_hdf_different_bunches(): runinfolist = getRunInfoList() bunchnums = np.linspace(1, 50, 10) #fig = plt.figure(figsize=(10,8)) #colors = plt.rcParams["axes.prop_cycle"]() #ax = fig.add_subplot(111) #plotroots = [] textstr = "" hdflist = [] for bunchnum in bunchnums: bunchnum = int(bunchnum) for j, runinfo in enumerate(runinfolist): dirname = "../" + runinfo.getDirName_grid( ) + "/" + "root" + "/" + conf.Run_name #print(dirname) rootlist = [ rootfile for rootfile in glob.glob(dirname + "/*.root") ] filename = rootlist[0] #print rootlist[0] name, _ = os.path.splitext(filename) name, _ = os.path.splitext(name) _, name = os.path.split(name) name = name.split('_')[1] #print(name) hdfpath = "../" + runinfo.getDirName_grid() + "/" + "hdf_files" makeDirectory(hdfpath) hdfname = hdfpath + "/" + runinfo.getDirName_grid() + "_" + str( bunchnum) + "_bunches_" + conf.Run_name + ".h5" if not os.path.exists(hdfname): df = iter_files_bunches(rootlist, bunchnum) df.to_hdf(hdfname, key='df') print(hdfname + " created") hdflist.append(hdfname) else: print hdfname, " exists" hdflist.append(hdfname) print "hdf length ", len(hdflist)
def plot_all_one_param_constant_bunch(x_or_y, var_name, bunchnum): runinfolist = getRunInfoList() #fig = plt.figure(figsize=(10,8)) #colors = plt.rcParams["axes.prop_cycle"]() #ax = fig.add_subplot(111) #plotroots = [] textstr = "" runlist = [] sigxlist = [] sigylist = [] for j, runinfo in enumerate(runinfolist): runlist.append(runinfo.getDirName_grid()) runname = runinfo.getDirName_grid() sigx = runname.split('_')[1] sigy = runname.split('_')[2] sigxlist.append(sigx) sigylist.append(sigy) sigxlist = set(sigxlist) sigylist = set(sigylist) hdfx_const = [] hdfy_const = [] for sigxval, sigyval in zip(sigxlist, sigylist): for j, runinfo in enumerate(runinfolist): dirname = "../" + runinfo.getDirName_grid( ) + "/" + "root" + "/" + conf.Run_name #print(dirname) hdfpath = "../" + runinfo.getDirName_grid() + "/" + "hdf_files" makeDirectory(hdfpath) hdfname = hdfpath + "/" + runinfo.getDirName_grid() + "_" + str( bunchnum) + "_bunches_" + conf.Run_name + ".h5" name, _ = os.path.splitext(hdfname) #name,_=os.path.splitext(name) _, name = os.path.split(name) name = name.split('_') #every 4 elements of the list constant parameter if name[2] == sigyval: hdfy_const.append(hdfname) #print "y const hdf ",hdfname if name[1] == sigxval: hdfx_const.append(hdfname) if x_or_y == "y": for hdfy in hdfy_const: print hdfy phiplot = PhiPlotAllInOneWithArg(hdfy_const[4:8], y1, var_name, bunchnum) phiplot.plot() elif x_or_y == "x": for hdfx in hdfx_const: print hdfx phiplot = PhiPlotAllInOneWithArg(hdfx_const[:4], x0, var_name, bunchnum) phiplot.plot()
def phi_plot_all_in_one(var_name, bunchnum=50): runinfolist = getRunInfoList() fig = plt.figure(figsize=(10, 8)) colors = plt.rcParams["axes.prop_cycle"]() ax = fig.add_subplot(111) plotroots = [] textstr = "" for j, runinfo in enumerate(runinfolist): dirname = "../" + runinfo.getDirName_grid( ) + "/" + "root" + "/" + conf.Run_name hdfpath = "../" + runinfo.getDirName_grid() + "/" + "hdf_files" makeDirectory(hdfpath) hdfname = hdfpath + "/" + runinfo.getDirName_grid() + "_" + str( bunchnum) + "_bunches_" + conf.Run_name + ".h5" name, _ = os.path.splitext(hdfname) _, name = os.path.split(name) name = name.split('_') name = name[1] + "_" + name[2] print(name) df = pd.read_hdf(hdfname, key="df") df = df.query('BeamCal_pdgcont==-11') if var_name == "phi": df = df['BeamCal_contphi'] phi = DfToNp(df) textstr = textstr + "\n" + name + 'entries:' + str(phi.shape[0]) set_axs(ax, xlabel=r'$\phi$', ylabel='Number', textstr=textstr) elif var_name == "ro": df = df['BeamCal_contro'] phi = DfToNp(df) textstr = textstr + "\n" + name + 'entries:' + str(phi.shape[0]) set_axs(ax, xlabel=r'$\rho$', ylabel='Number', textstr=textstr) elif var_name == "energy": df = df['BeamCal_energycont'] phi = DfToNp(df) textstr = textstr + "\n" + name + 'entries:' + str(phi.shape[0]) set_axs(ax, xlabel='energy', ylabel='Number', textstr=textstr) c = next(colors)["color"] #textstr =textstr+"\n"+name+'entries:'+ str(phi.shape[0]) #set_axs(ax,xlabel=r'$\rho$',ylabel='Number',textstr=textstr) ax.hist(phi, bins=100, histtype='step', label=name, density=False, color=c, log=True) ax.legend(loc='upper right', ncol=3, framealpha=0.1) if var_name == "phi": plt.suptitle(r"$\phi$ Plots BeamCal_z>0 Positron") newdir = conf.Run_name + "_plots" makeDirectory(newdir) imname = newdir + "/" + "phi_plots_" + str( bunchnum) + "bunch_log_Positron_allinone" elif var_name == "ro": plt.suptitle(r"$\rho$ Plots BeamCal_z>0 Positron") newdir = conf.Run_name + "_plots" makeDirectory(newdir) imname = newdir + "/" + "ro_plots_" + str( bunchnum) + "bunch_log_Positron_allinone" elif var_name == "energy": plt.suptitle("Energy Plots BeamCal_z>0 Positron") newdir = conf.Run_name + "_plots" makeDirectory(newdir) imname = newdir + "/" + "energy_plots_" + str( bunchnum) + "bunch_log_Positron_allinone" plt.tight_layout() plt.savefig(imname + ".png") print imname + ".png created"
def plot(self): runinfolist = getRunInfoList() fig = plt.figure(figsize=(10, 8)) colors = plt.rcParams["axes.prop_cycle"]() ax = fig.add_subplot(111) plotroots = [] textstr = "" for j, hdfname in enumerate(self.filenamelist): name, _ = os.path.splitext(hdfname) _, name = os.path.split(name) name = name.split('_') if self.x_or_y == "x": name = name[2] #name of non constant parameter elif self.x_or_y == "y": name = name[1] print(name) df = pd.read_hdf(hdfname, key="df") if self.pdg == "pos": df = df.query('BeamCal_pdgcont==-11') if not self.var_name == "energy": df = df['BeamCal_cont' + self.var_name] ar = DfToNp(df) textstr = textstr + "\n" + name + 'entries:' + str(ar.shape[0]) if self.var_name == "phi": set_axs(ax, xlabel=r"$\phi$", ylabel='Number', textstr=textstr) elif self.var_name == "ro": set_axs(ax, xlabel=r'$\rho$', ylabel='Number', textstr=textstr) elif self.var_name == "energy": df = df['BeamCal_energycont'] ar = DfToNp(df) textstr = textstr + "\n" + name + 'entries:' + str(ar.shape[0]) ax.set_xlim((0, 0.001)) set_axs(ax, xlabel='energy', ylabel='Number', textstr=textstr) c = next(colors)["color"] ax.hist(ar, bins=100, histtype='step', label=name, density=False, color=c, log=False) #ax.hist(phi,bins=100,histtype='step',label=name,density=False, color=c,log=True) ax.legend(loc='upper right', ncol=2, framealpha=0.1) #ax.remove() newdir = conf.Run_name + "_plots" makeDirectory(newdir) bunchnum = self.bunchnum if var_name == "phi": if self.x_or_y == "x": if self.pdg == "pos": plt.suptitle(r"$\phi$ Plots BeamCal_z>0 Positron " + str(bunchnum) + " bunches") imname = newdir + "/" + "phi_plots_" + str( bunchnum) + "bunch_log_Positron_allinone_const_x" else: plt.suptitle(r"$\phi$ Plots BeamCal_z>0 " + str(bunchnum) + " bunches") imname = newdir + "/" + "phi_plots_" + str( bunchnum) + "bunch_log_allinone_const_x_nopdg" elif self.x_or_y == "y": if self.pdg == "pos": plt.suptitle(r"$\phi$ Plots BeamCal_z>0 Positron " + str(bunchnum) + " bunches") imname = newdir + "/" + "phi_plots_" + str( bunchnum) + "bunch_log_Positron_allinone_const_y" else: plt.suptitle(r"$\phi$ Plots BeamCal_z>0 " + str(bunchnum) + " bunches") imname = newdir + "/" + "phi_plots_" + str( bunchnum) + "bunch_log_allinone_const_y_nopdg" elif self.var_name == "ro": if self.x_or_y == "x": if self.pdg == "pos": plt.suptitle(r"$\rho$ Plots BeamCal_z>0 Positron " + str(bunchnum) + " bunches") imname = newdir + "/" + "ro_plots_" + str( bunchnum) + "bunch_log_Positron_allinone_const_x" else: plt.suptitle(r"$\rho$ Plots BeamCal_z>0 " + str(bunchnum) + " bunches") imname = newdir + "/" + "ro_plots_" + str( bunchnum) + "bunch_log_allinone_const_x_nopdg" elif self.x_or_y == "y": if self.pdg == "pos": plt.suptitle(r"$\rho$ Plots BeamCal_z>0 Positron " + str(bunchnum) + " bunches") imname = newdir + "/" + "ro_plots_" + str( bunchnum) + "bunch_log_Positron_allinone_const_y" else: plt.suptitle(r"$\rho$ Plots BeamCal_z>0 " + str(bunchnum) + " bunches") imname = newdir + "/" + "ro_plots_" + str( bunchnum) + "bunch_log_Positron_allinone_const_y_nppdg" elif self.var_name == "energy": if self.x_or_y == "x": if self.pdg == "pos": plt.suptitle("Energy Plots BeamCal_z>0 Positron " + str(bunchnum) + " bunches") imname = newdir + "/" + "energy_plots_" + str( bunchnum) + "bunch_log_Positron_allinone_const_x" else: plt.suptitle("Energy Plots BeamCal_z>0 " + str(bunchnum) + " bunches") imname = newdir + "/" + "energy_plots_" + str( bunchnum) + "bunch_log_Positron_allinone_const_x_nopdg" elif self.x_or_y == "y": if self.pdg == "pos": plt.suptitle("Energy Plots BeamCal_z>0 Positron " + str(bunchnum) + " bunches") imname = newdir + "/" + "energy_plots_" + str( bunchnum) + "bunch_log_Positron_allinone_const_y" else: plt.suptitle("Energy Plots BeamCal_z>0 " + str(bunchnum) + " bunches") imname = newdir + "/" + "energy_plots_" + str( bunchnum) + "bunch_log_Positron_allinone_const_y_nopdg" plt.savefig(imname + ".png") plt.close() print imname + ".png created"
outputDirectory = os.path.abspath(args.outputDirectory) scriptsDirectory = os.path.abspath(args.scriptsDirectory) # Read configuration files config = util.readConfigurationFiles() header = config.getboolean("server", "PBS_header") picard_folder = config.get("picard", "folder") genome = config.get("project", "genome") genomeFile = config.get(genome, "genomeFile") samples = util.getMergedsamples() # Create script and output directories, if they do not exist yet. util.makeDirectory(outputDirectory) util.makeDirectory(scriptsDirectory) # CD to scripts directory os.chdir(scriptsDirectory) # Write scripts for sample in samples: scriptName = "bsmap_methratio_" + sample + ".sh" script = open(scriptName, "w") if header: util.writeHeader(script, config, "bsmap_methratio") # Reorder script.write("java -Xmx4g -Xms4g -jar " + os.path.join(picard_folder, "CalculateHsMetrics.jar") + " \\\n") script.write("BAIT_INTERVALS=" + os.path.join("../../results/bait_intervals", sample + "_design_bait_intervals.txt") + " \\\n")
# Check if the inputDirectory exists, and is a directory. util.checkInputDirectory(inputDirectory) # Get samples samples = util.getSamples() # Read configuration files config = util.readConfigurationFiles() header = config.getboolean("server", "PBS_header") genome = config.get("project", "genome") genomeFolder = config.get(genome, "genomeFolder") institute = config.get(genome, "institute") # Create script and output directories, if they do not exist yet. util.makeDirectory(bedgraphDirectoryUCSC, recursive=True) if institute == "Ensembl": util.makeDirectory(bedgraphDirectoryEnsembl, recursive=True) util.makeDirectory(outputDirectory, recursive=True) util.makeDirectory(scriptsDirectory) # cd to scripts directory os.chdir(scriptsDirectory) if stranded: strands = ["", "_positive", "_negative"] else: strands = [""] # Write script for each file for sample in samples:
# Process the command line arguments. scriptsDirectory = os.path.abspath(args.scriptsDirectory) inputDirectory = os.path.abspath(args.inputDirectory) # Read configuration files config = util.readConfigurationFiles() header = config.getboolean("server", "PBS_header") # Get samples and conditions samples = util.getMergedsamples() samples = sorted(samples, reverse=True) # To always put wt first, put the list in reverse alpabetical order # Create scripts directory, if it does not exist yet, and cd to it. util.makeDirectory(scriptsDirectory) os.chdir(scriptsDirectory) for sample in samples: # Create script scriptName = "splitbam_" + sample + ".sh" script = open(scriptName, 'w') if header: util.writeHeader(script, config, "samtoolsIndex") script.write("bamtools split " + "\\\n") script.write("-tag ZS " + "\\\n") inputFile = glob.glob(inputDirectory + "/" + sample + "*" + args.extension)[0] script.write("-in " + inputFile + " \\\n") script.write("&> " + scriptName + ".log") script.write("\n\n")
args = parser.parse_args() # If not in the main scripts directory, cd to the main scripts directory, if it exists. util.cdMainScriptsDirectory() # Process command line arguments. scriptsDirectory = os.path.abspath(args.scriptsDirectory) inputDirectory = os.path.abspath(args.inputDirectory) outputDirectory = os.path.abspath(args.outputDirectory) # Read samples file. samplesDataFrame = util.readSamplesFile() samples = samplesDataFrame["sample"].tolist() # Create script and output directories, if they do not exist yet. util.makeDirectory(outputDirectory, recursive=True) util.makeDirectory(scriptsDirectory, recursive=True) # Read configuration files config = util.readConfigurationFiles() header = config.getboolean("server", "PBS_header") # Change to scripts directory os.chdir(scriptsDirectory) files = os.listdir(inputDirectory) # Cycle through all the files and write the fastqc scripts. for file in files: if not os.path.isfile(os.path.join(inputDirectory, file)):