Exemple #1
0
    def __init__(self):

        # Setup Parameters
        util.params = None
        self.dnnModelPath = util.getFullFileName(
            util.getParameter('DnnModelPath'))
        self.numTrainingInstances = util.getParameter(
            'NumActivationTrainingInstances')
        self.timestamp = datetime.datetime.now().strftime("%y%m%d_%H%M%S")
        self.outputName = util.getSetupFileDescription(
        ) + '--' + self.timestamp
        self.outputDir = 'output/%s' % (self.outputName)
        util.makeDirectory(self.outputDir)
        util.isLoggingEnabled = util.getParameter('LoggingEnabled')
        util.logPath = self.outputDir + '/%s.log' % (self.outputName)
        util.logLevel = util.getParameter('LogLevel')
        util.thisLogger = util.Logger()
        util.storeSetupParamsInLog()

        # Setup memory environment
        self.processorType = processorType = util.getParameter('ProcessorType')

        self.startTime = datetime.datetime.now()

        self.streamList = None
        self.clustererList = None
        self.classMaxValues1 = None  # max value of raw activation data
        self.classMaxValues2 = None  # max value of reduced activation data

        self.flatActivations = None
        self.activationBatches = None
        self.batchFlatActivations = None
        self.reducedFlatActivations = None
Exemple #2
0
def sampling():
    """
    The main function of the sampling process.
    :return:
    """
    # make directory for street images
    streetImageOutputFolder = CONFIG["sampling"]["streetImageOutputFolder"]
    makeDirectory(streetImageOutputFolder)

    # Get preprocessed point data
    intersectionPointFile = CONFIG["shapefile"]["intersectoinPointFile"]
    pointInfoFile = CONFIG["shapefile"]["pointInfoFilename"]

    pointInfo = readPointFile(pointInfoFile)
    intersectionPointInfo = readIntersectionPointInfo(intersectionPointFile)

    # Filter point data that has street images taken within the specified period.
    maxYear = CONFIG["gmap"]["streetImageMaxYear"]
    minYear = CONFIG["gmap"]["streetImageMinYear"]
    filteredPoints = filterPointByYear(pointInfo, maxYear, minYear)

    IMG_NAME_COL_NUM = 5
    LAT_LNG_COL_NUM = 2

    # Sample street images, the return is list of sample info
    sampleNum = CONFIG["sampling"]["sampleNum"]
    initImageNumber = CONFIG["sampling"]["initImageNumber"]
    sampleData = sampleAndDownloadStreetImage(filteredPoints, sampleNum,
                                              initImageNumber, initImageNumber,
                                              streetImageOutputFolder,
                                              intersectionPointInfo)
    imageNames = [
        streetImageOutputFolder + "/" + data[IMG_NAME_COL_NUM]
        for data in sampleData
    ]
    links = GDriveUpload(imageNames, "Sampled_Image")

    for i in xrange(len(sampleData)):
        imageName = streetImageOutputFolder + "/" + sampleData[i][
            IMG_NAME_COL_NUM]
        sampleData[i].append(links[imageName])

    columnTitle = [
        "Sample Number", "Sampled Point Number", "Latitude + Longitude",
        "Heading", "Date", "Image Name", "Road Types", "Web Link"
    ]
    sampleData.insert(0, columnTitle)

    # output to csv file
    outputCSV(sampleData, CONFIG["sampling"]["csvFilename"])

    # plot images map
    sampledPoints = set(
        [divideGPS(d[LAT_LNG_COL_NUM]) for d in sampleData[1:]])
    plotSampledPointMap(list(sampledPoints),
                        CONFIG["sampling"]["sampledPointsMapFilename"])
Exemple #3
0
def root_to_hdf_different_bunches():
    runinfolist = getRunInfoList()
    bunchnums = np.linspace(1, 50, 10)

    #fig = plt.figure(figsize=(10,8))
    #colors = plt.rcParams["axes.prop_cycle"]()
    #ax = fig.add_subplot(111)

    #plotroots = []

    textstr = ""
    hdflist = []

    for bunchnum in bunchnums:
        bunchnum = int(bunchnum)
        for j, runinfo in enumerate(runinfolist):
            dirname = "../" + runinfo.getDirName_grid(
            ) + "/" + "root" + "/" + conf.Run_name
            #print(dirname)
            rootlist = [
                rootfile for rootfile in glob.glob(dirname + "/*.root")
            ]
            filename = rootlist[0]
            #print rootlist[0]

            name, _ = os.path.splitext(filename)
            name, _ = os.path.splitext(name)
            _, name = os.path.split(name)

            name = name.split('_')[1]
            #print(name)

            hdfpath = "../" + runinfo.getDirName_grid() + "/" + "hdf_files"
            makeDirectory(hdfpath)
            hdfname = hdfpath + "/" + runinfo.getDirName_grid() + "_" + str(
                bunchnum) + "_bunches_" + conf.Run_name + ".h5"

            if not os.path.exists(hdfname):
                df = iter_files_bunches(rootlist, bunchnum)

                df.to_hdf(hdfname, key='df')

                print(hdfname + " created")
                hdflist.append(hdfname)
            else:
                print hdfname, " exists"
                hdflist.append(hdfname)

        print "hdf length ", len(hdflist)
Exemple #4
0
def plot_all_one_param_constant_bunch(x_or_y, var_name, bunchnum):

    runinfolist = getRunInfoList()

    #fig = plt.figure(figsize=(10,8))
    #colors = plt.rcParams["axes.prop_cycle"]()
    #ax = fig.add_subplot(111)

    #plotroots = []

    textstr = ""
    runlist = []
    sigxlist = []
    sigylist = []
    for j, runinfo in enumerate(runinfolist):

        runlist.append(runinfo.getDirName_grid())
        runname = runinfo.getDirName_grid()

        sigx = runname.split('_')[1]
        sigy = runname.split('_')[2]

        sigxlist.append(sigx)
        sigylist.append(sigy)

    sigxlist = set(sigxlist)
    sigylist = set(sigylist)

    hdfx_const = []
    hdfy_const = []

    for sigxval, sigyval in zip(sigxlist, sigylist):
        for j, runinfo in enumerate(runinfolist):

            dirname = "../" + runinfo.getDirName_grid(
            ) + "/" + "root" + "/" + conf.Run_name
            #print(dirname)

            hdfpath = "../" + runinfo.getDirName_grid() + "/" + "hdf_files"
            makeDirectory(hdfpath)
            hdfname = hdfpath + "/" + runinfo.getDirName_grid() + "_" + str(
                bunchnum) + "_bunches_" + conf.Run_name + ".h5"

            name, _ = os.path.splitext(hdfname)
            #name,_=os.path.splitext(name)
            _, name = os.path.split(name)

            name = name.split('_')

            #every 4 elements of the list constant parameter
            if name[2] == sigyval:
                hdfy_const.append(hdfname)
                #print "y const hdf ",hdfname
            if name[1] == sigxval:
                hdfx_const.append(hdfname)

    if x_or_y == "y":
        for hdfy in hdfy_const:
            print hdfy
        phiplot = PhiPlotAllInOneWithArg(hdfy_const[4:8], y1, var_name,
                                         bunchnum)
        phiplot.plot()

    elif x_or_y == "x":
        for hdfx in hdfx_const:
            print hdfx
        phiplot = PhiPlotAllInOneWithArg(hdfx_const[:4], x0, var_name,
                                         bunchnum)
        phiplot.plot()
Exemple #5
0
def phi_plot_all_in_one(var_name, bunchnum=50):
    runinfolist = getRunInfoList()

    fig = plt.figure(figsize=(10, 8))
    colors = plt.rcParams["axes.prop_cycle"]()
    ax = fig.add_subplot(111)

    plotroots = []

    textstr = ""
    for j, runinfo in enumerate(runinfolist):
        dirname = "../" + runinfo.getDirName_grid(
        ) + "/" + "root" + "/" + conf.Run_name

        hdfpath = "../" + runinfo.getDirName_grid() + "/" + "hdf_files"
        makeDirectory(hdfpath)
        hdfname = hdfpath + "/" + runinfo.getDirName_grid() + "_" + str(
            bunchnum) + "_bunches_" + conf.Run_name + ".h5"

        name, _ = os.path.splitext(hdfname)
        _, name = os.path.split(name)

        name = name.split('_')

        name = name[1] + "_" + name[2]
        print(name)

        df = pd.read_hdf(hdfname, key="df")
        df = df.query('BeamCal_pdgcont==-11')

        if var_name == "phi":
            df = df['BeamCal_contphi']
            phi = DfToNp(df)
            textstr = textstr + "\n" + name + 'entries:' + str(phi.shape[0])
            set_axs(ax, xlabel=r'$\phi$', ylabel='Number', textstr=textstr)

        elif var_name == "ro":
            df = df['BeamCal_contro']
            phi = DfToNp(df)
            textstr = textstr + "\n" + name + 'entries:' + str(phi.shape[0])
            set_axs(ax, xlabel=r'$\rho$', ylabel='Number', textstr=textstr)

        elif var_name == "energy":
            df = df['BeamCal_energycont']
            phi = DfToNp(df)
            textstr = textstr + "\n" + name + 'entries:' + str(phi.shape[0])
            set_axs(ax, xlabel='energy', ylabel='Number', textstr=textstr)

        c = next(colors)["color"]

        #textstr =textstr+"\n"+name+'entries:'+ str(phi.shape[0])
        #set_axs(ax,xlabel=r'$\rho$',ylabel='Number',textstr=textstr)

        ax.hist(phi,
                bins=100,
                histtype='step',
                label=name,
                density=False,
                color=c,
                log=True)
        ax.legend(loc='upper right', ncol=3, framealpha=0.1)

    if var_name == "phi":
        plt.suptitle(r"$\phi$ Plots BeamCal_z>0 Positron")
        newdir = conf.Run_name + "_plots"
        makeDirectory(newdir)
        imname = newdir + "/" + "phi_plots_" + str(
            bunchnum) + "bunch_log_Positron_allinone"

    elif var_name == "ro":
        plt.suptitle(r"$\rho$ Plots BeamCal_z>0 Positron")
        newdir = conf.Run_name + "_plots"
        makeDirectory(newdir)
        imname = newdir + "/" + "ro_plots_" + str(
            bunchnum) + "bunch_log_Positron_allinone"

    elif var_name == "energy":
        plt.suptitle("Energy Plots BeamCal_z>0 Positron")
        newdir = conf.Run_name + "_plots"
        makeDirectory(newdir)
        imname = newdir + "/" + "energy_plots_" + str(
            bunchnum) + "bunch_log_Positron_allinone"

    plt.tight_layout()

    plt.savefig(imname + ".png")
    print imname + ".png created"
Exemple #6
0
    def plot(self):

        runinfolist = getRunInfoList()

        fig = plt.figure(figsize=(10, 8))
        colors = plt.rcParams["axes.prop_cycle"]()
        ax = fig.add_subplot(111)

        plotroots = []

        textstr = ""
        for j, hdfname in enumerate(self.filenamelist):

            name, _ = os.path.splitext(hdfname)
            _, name = os.path.split(name)
            name = name.split('_')

            if self.x_or_y == "x":
                name = name[2]  #name of non constant parameter
            elif self.x_or_y == "y":
                name = name[1]
            print(name)

            df = pd.read_hdf(hdfname, key="df")
            if self.pdg == "pos":
                df = df.query('BeamCal_pdgcont==-11')
            if not self.var_name == "energy":
                df = df['BeamCal_cont' + self.var_name]
                ar = DfToNp(df)
                textstr = textstr + "\n" + name + 'entries:' + str(ar.shape[0])

            if self.var_name == "phi":
                set_axs(ax, xlabel=r"$\phi$", ylabel='Number', textstr=textstr)

            elif self.var_name == "ro":
                set_axs(ax, xlabel=r'$\rho$', ylabel='Number', textstr=textstr)

            elif self.var_name == "energy":
                df = df['BeamCal_energycont']
                ar = DfToNp(df)
                textstr = textstr + "\n" + name + 'entries:' + str(ar.shape[0])
                ax.set_xlim((0, 0.001))
                set_axs(ax, xlabel='energy', ylabel='Number', textstr=textstr)

            c = next(colors)["color"]
            ax.hist(ar,
                    bins=100,
                    histtype='step',
                    label=name,
                    density=False,
                    color=c,
                    log=False)
            #ax.hist(phi,bins=100,histtype='step',label=name,density=False, color=c,log=True)
            ax.legend(loc='upper right', ncol=2, framealpha=0.1)
            #ax.remove()

        newdir = conf.Run_name + "_plots"
        makeDirectory(newdir)
        bunchnum = self.bunchnum
        if var_name == "phi":
            if self.x_or_y == "x":
                if self.pdg == "pos":
                    plt.suptitle(r"$\phi$ Plots BeamCal_z>0 Positron " +
                                 str(bunchnum) + " bunches")
                    imname = newdir + "/" + "phi_plots_" + str(
                        bunchnum) + "bunch_log_Positron_allinone_const_x"
                else:
                    plt.suptitle(r"$\phi$ Plots BeamCal_z>0 " + str(bunchnum) +
                                 " bunches")
                    imname = newdir + "/" + "phi_plots_" + str(
                        bunchnum) + "bunch_log_allinone_const_x_nopdg"
            elif self.x_or_y == "y":
                if self.pdg == "pos":
                    plt.suptitle(r"$\phi$ Plots BeamCal_z>0 Positron " +
                                 str(bunchnum) + " bunches")
                    imname = newdir + "/" + "phi_plots_" + str(
                        bunchnum) + "bunch_log_Positron_allinone_const_y"
                else:
                    plt.suptitle(r"$\phi$ Plots BeamCal_z>0 " + str(bunchnum) +
                                 " bunches")
                    imname = newdir + "/" + "phi_plots_" + str(
                        bunchnum) + "bunch_log_allinone_const_y_nopdg"

        elif self.var_name == "ro":
            if self.x_or_y == "x":
                if self.pdg == "pos":
                    plt.suptitle(r"$\rho$ Plots BeamCal_z>0 Positron " +
                                 str(bunchnum) + " bunches")
                    imname = newdir + "/" + "ro_plots_" + str(
                        bunchnum) + "bunch_log_Positron_allinone_const_x"
                else:
                    plt.suptitle(r"$\rho$ Plots BeamCal_z>0 " + str(bunchnum) +
                                 " bunches")
                    imname = newdir + "/" + "ro_plots_" + str(
                        bunchnum) + "bunch_log_allinone_const_x_nopdg"

            elif self.x_or_y == "y":
                if self.pdg == "pos":
                    plt.suptitle(r"$\rho$ Plots BeamCal_z>0 Positron " +
                                 str(bunchnum) + " bunches")
                    imname = newdir + "/" + "ro_plots_" + str(
                        bunchnum) + "bunch_log_Positron_allinone_const_y"
                else:
                    plt.suptitle(r"$\rho$ Plots BeamCal_z>0 " + str(bunchnum) +
                                 " bunches")
                    imname = newdir + "/" + "ro_plots_" + str(
                        bunchnum) + "bunch_log_Positron_allinone_const_y_nppdg"

        elif self.var_name == "energy":
            if self.x_or_y == "x":
                if self.pdg == "pos":
                    plt.suptitle("Energy Plots BeamCal_z>0 Positron " +
                                 str(bunchnum) + " bunches")
                    imname = newdir + "/" + "energy_plots_" + str(
                        bunchnum) + "bunch_log_Positron_allinone_const_x"
                else:
                    plt.suptitle("Energy Plots BeamCal_z>0  " + str(bunchnum) +
                                 " bunches")
                    imname = newdir + "/" + "energy_plots_" + str(
                        bunchnum) + "bunch_log_Positron_allinone_const_x_nopdg"

            elif self.x_or_y == "y":
                if self.pdg == "pos":
                    plt.suptitle("Energy Plots BeamCal_z>0 Positron " +
                                 str(bunchnum) + " bunches")
                    imname = newdir + "/" + "energy_plots_" + str(
                        bunchnum) + "bunch_log_Positron_allinone_const_y"
                else:
                    plt.suptitle("Energy Plots BeamCal_z>0  " + str(bunchnum) +
                                 " bunches")
                    imname = newdir + "/" + "energy_plots_" + str(
                        bunchnum) + "bunch_log_Positron_allinone_const_y_nopdg"

        plt.savefig(imname + ".png")
        plt.close()
        print imname + ".png created"
outputDirectory = os.path.abspath(args.outputDirectory)
scriptsDirectory = os.path.abspath(args.scriptsDirectory)

# Read configuration files
config = util.readConfigurationFiles()

header = config.getboolean("server", "PBS_header")

picard_folder = config.get("picard", "folder")
genome = config.get("project", "genome")
genomeFile = config.get(genome, "genomeFile")

samples = util.getMergedsamples()

# Create script and output directories, if they do not exist yet.
util.makeDirectory(outputDirectory)
util.makeDirectory(scriptsDirectory)

# CD to scripts directory
os.chdir(scriptsDirectory)

# Write scripts
for sample in samples:
    scriptName =  "bsmap_methratio_" + sample + ".sh"
    script = open(scriptName, "w")
    if header:
        util.writeHeader(script, config, "bsmap_methratio")
    # Reorder 
    
    script.write("java -Xmx4g -Xms4g -jar " + os.path.join(picard_folder, "CalculateHsMetrics.jar") + " \\\n")
    script.write("BAIT_INTERVALS=" + os.path.join("../../results/bait_intervals", sample + "_design_bait_intervals.txt") + " \\\n")
# Check if the inputDirectory exists, and is a directory.
util.checkInputDirectory(inputDirectory)

# Get samples
samples = util.getSamples()

# Read configuration files
config = util.readConfigurationFiles()

header = config.getboolean("server", "PBS_header")
genome = config.get("project", "genome")
genomeFolder = config.get(genome, "genomeFolder")
institute = config.get(genome, "institute")

# Create script and output directories, if they do not exist yet.
util.makeDirectory(bedgraphDirectoryUCSC, recursive=True)
if institute == "Ensembl":
    util.makeDirectory(bedgraphDirectoryEnsembl, recursive=True)
util.makeDirectory(outputDirectory, recursive=True)
util.makeDirectory(scriptsDirectory)

# cd to scripts directory
os.chdir(scriptsDirectory)

if stranded:
    strands = ["", "_positive", "_negative"]
else:
    strands = [""]

# Write script for each file
for sample in samples:
Exemple #9
0
# Process the command line arguments.
scriptsDirectory = os.path.abspath(args.scriptsDirectory) 
inputDirectory = os.path.abspath(args.inputDirectory)

# Read configuration files
config = util.readConfigurationFiles()

header = config.getboolean("server", "PBS_header")

# Get samples and conditions
samples = util.getMergedsamples()
samples = sorted(samples, reverse=True) # To always put wt first, put the list in reverse alpabetical order

# Create scripts directory, if it does not exist yet, and cd to it.
util.makeDirectory(scriptsDirectory)
os.chdir(scriptsDirectory)

for sample in samples:
    # Create script
    scriptName = "splitbam_" + sample + ".sh"
    script = open(scriptName, 'w')
    if header:
        util.writeHeader(script, config, "samtoolsIndex")
    script.write("bamtools split " + "\\\n")    
    script.write("-tag ZS " + "\\\n")
    inputFile = glob.glob(inputDirectory + "/" + sample + "*" + args.extension)[0]
    script.write("-in " + inputFile + " \\\n")
    script.write("&> " + scriptName + ".log")

    script.write("\n\n")
args = parser.parse_args()

# If not in the main scripts directory, cd to the main scripts directory, if it exists.
util.cdMainScriptsDirectory()

# Process command line arguments.
scriptsDirectory = os.path.abspath(args.scriptsDirectory)
inputDirectory = os.path.abspath(args.inputDirectory)
outputDirectory = os.path.abspath(args.outputDirectory)

# Read samples file.
samplesDataFrame = util.readSamplesFile()
samples = samplesDataFrame["sample"].tolist()

# Create script and output directories, if they do not exist yet.
util.makeDirectory(outputDirectory, recursive=True)
util.makeDirectory(scriptsDirectory, recursive=True)

# Read configuration files
config = util.readConfigurationFiles()

header = config.getboolean("server", "PBS_header")

# Change to scripts directory
os.chdir(scriptsDirectory)

files = os.listdir(inputDirectory)

# Cycle through all the files and write the fastqc scripts.
for file in files:
    if not os.path.isfile(os.path.join(inputDirectory, file)):