def aggregateDataFromPath(path, aggregationDictionary):
    filenames = monet.readExperimentFilenames(path)
    landscapeSumData = monet.sumLandscapePopulationsFromFiles(
        filenames, male=True, female=True, dataType=float
    )
    aggData = monet.aggregateGenotypesInNode(landscapeSumData, aggregationDictionary)
    return aggData
Beispiel #2
0
def preProcessSubLandscape(pop,
                           landReps,
                           fName,
                           drive,
                           nodesAggLst,
                           nodeAggIx,
                           MF=(True, True),
                           cmpr='bz2',
                           SUM=True,
                           AGG=True,
                           SPA=True,
                           REP=True,
                           SRP=True):
    """
    Preprocesses a subset of the landscape
    Args:
        pop (list): Files list element aggregated by landscape subset
        landReps (dict): Landscape repetitions
                (spatial from monet.loadAndAggregateLandscapeDataRepetitions)
        fName (str): Filename (including path)
        drive (dict): Gene-drive dictionary
        nodesAggLst (lst): List of lists containing the indices of the nodes
                to be aggregated together
        nodeAggIx (int): Current list to process (from the nodeAggLst)
        MF (bool tuple): Male and Female boolean selectors
        cmpr (str): Compression algorithm to be used by compress-python
        SUM (bool): Population summed and gene-aggregated into one node
        AGG (bool): Population gene-aggregated in their own nodes
        SPA (bool): Genetic landscape (gene-aggregated)
        REP (bool): Garbage gene-aggregated data
        SRP (bool): Summed into one garbage gene-aggregated data
    Returns:
        None
    """
    if SUM:
        sumData = monet.sumLandscapePopulationsFromFiles(pop, MF[0], MF[1])
        sumAgg = monet.aggregateGenotypesInNode(sumData, drive)
        pkl.dump(sumAgg, fName + '_sum', compression=cmpr)
    if AGG:
        aggData = monet.loadAndAggregateLandscapeData(pop, drive, MF[0], MF[1])
        pkl.dump(aggData, fName + '_agg', compression=cmpr)
    if SPA:
        geneSpaTemp = monet.getGenotypeArraysFromLandscape(aggData)
        pkl.dump(geneSpaTemp, fName + '_spa', compression=cmpr)
    if REP or SRP:
        fLandReps = monet.filterAggregateGarbageByIndex(
            landReps, nodesAggLst[nodeAggIx])
        pkl.dump(fLandReps, fName + '_rep', compression=cmpr)
    if SRP:
        fRepsSum = [sum(i) for i in fLandReps['landscapes']]
        fRepsDict = {
            'genotypes': fLandReps['genotypes'],
            'landscapes': fRepsSum
        }
        pkl.dump(fRepsDict, fName + '_srp', compression=cmpr)
    return None
Beispiel #3
0
def getAggDataSSDay(pathsRoot, i):
    pathSample = pathsRoot[i] + "/"
    experimentString = pathSample.split("/")[-2]
    filenames = monet.readExperimentFilenames(pathSample)
    landscapeSumData = monet.sumLandscapePopulationsFromFiles(filenames,
                                                              male=True,
                                                              female=True,
                                                              dataType=float)
    aggData = monet.aggregateGenotypesInNode(landscapeSumData,
                                             aggregationDictionary)
    ssDay = aux.reachedSteadtStateAtDay(aggData, .01)
    return aggData, ssDay, experimentString
Beispiel #4
0
def calculateGeneTemporal(filenames):
    landscapeSumData = monet.sumLandscapePopulationsFromFiles(
        filenames, male=True, female=False, dataType=float
    )
    genotypes = landscapeSumData["genotypes"]
    aggregationDictionary = monet.autoGenerateGenotypesDictionary(
        ["W", "H", "R", "B"],
        genotypes
    )
    aggData = monet.aggregateGenotypesInNode(
        landscapeSumData,
        aggregationDictionary
    )
    return aggData
Beispiel #5
0
def loadFolderAndWriteFactorialCSVInclude(experimentString,
                                          path,
                                          aggregationDictionary,
                                          ratiosDictionary,
                                          male=True,
                                          female=True,
                                          dataType=float,
                                          includePattern='*'):
    """
    Description:
        * Wrapper function to perform the whole factorial parsing analysis on a
            folder and write the resulting CSV to drive.
    In:
        * experimentString:
        * path: Directory where the experiment is stored.
        * aggregationDictionary: Dictionary containing the keys to aggregate
            the genotypes (created with "generateAggregationDictionary")
        * ratiosDictionary: "numerator", and "denominator" lists dictionary
            containing the columns to use in each section of the ratio.
    Out:
        * NA
    Notes:
        * NA
    """
    # Read filenames
    filenames = monet.readExperimentFilenames(path + experimentString)
    # Filter out non-needed files
    (mFiles, fFiles) = (filenames['male'], filenames['female'])
    filenames['male'] = [i for i in mFiles if includePattern in i]
    filenames['female'] = [i for i in fFiles if includePattern in i]
    # Aggregate data
    landscapeSumData = monet.sumLandscapePopulationsFromFiles(
        filenames, male=male, female=female, dataType=dataType)
    aggregateData = monet.aggregateGenotypesInNode(landscapeSumData,
                                                   aggregationDictionary)
    split = monet.splitExperimentString(experimentString)
    monet.writeFactorialAnalysisCSV(split["releasesNumber"],
                                    int(split["coverage"]) / 1000.0, path,
                                    experimentString, aggregateData,
                                    ratiosDictionary)
    return None
Beispiel #6
0
def preProcessSubLandscapeV2(pop,
                             landReps,
                             fName,
                             drive,
                             nodesAggLst,
                             nodeAggIx,
                             MF=(True, True),
                             cmpr='bz2',
                             SUM=True,
                             AGG=True,
                             SPA=True,
                             REP=True,
                             SRP=True):
    if SUM:
        sumData = monet.sumLandscapePopulationsFromFiles(pop, MF[0], MF[1])
        sumAgg = monet.aggregateGenotypesInNode(sumData, drive)
        pkl.dump(sumAgg, fName + '_sum', compression=cmpr)
    if AGG:
        aggData = monet.loadAndAggregateLandscapeData(pop, drive, MF[0], MF[1])
        pkl.dump(aggData, fName + '_agg', compression=cmpr)
    if SPA:
        geneSpaTemp = monet.getGenotypeArraysFromLandscape(aggData)
        pkl.dump(geneSpaTemp, fName + '_spa', compression=cmpr)
    if REP or SRP:
        fLandReps = monet.filterAggregateGarbageByIndex(
            landReps, nodesAggLst[nodeAggIx])
        if REP:
            pkl.dump(fLandReps, fName + '_rep', compression=cmpr)
    if SRP:
        fRepsSum = [sum(i) for i in fLandReps['landscapes']]
        fRepsDict = {
            'genotypes': fLandReps['genotypes'],
            'landscapes': fRepsSum
        }
        pkl.dump(fRepsDict, fName + '_srp', compression=cmpr)
    return None
Beispiel #7
0
 print(expsPath)
 ###########################################################################
 # Stacked population plots (sums the whole landscape into a single
 #   population count over time)
 ###########################################################################
 if STACK:
     # Parses the paths of all CSV files starting with 'F_' and/or 'M_'
     filenames = monet.readExperimentFilenames(expsPath)
     # Loads all the files provided and sums them into one array of dims:
     #   [originalGenotypes, time, [counts]]
     landscapeSumData = monet.sumLandscapePopulationsFromFiles(
         filenames, male=maleToggle, female=femaleToggle, dataType=float)
     # Groups the genotypes into "bins" provided by the
     #   "aggregationDictionary" by summing the counts in each one of the
     #   columns.
     aggData = monet.aggregateGenotypesInNode(landscapeSumData,
                                              aggregationDictionary)
     # Calculates the dates at which the system arrives to the required
     #   thresholds
     ssDays = [aux.introgrationDay(aggData, 0, 1 - k) for k in probeRatios]
     # Plotting-related instructions
     daysTup = [
         fmtStr.format(day[1], day[0] / 7)
         for day in zip(ssDays, probeRatios)
     ]
     title = ' '.join(daysTup)
     figB = monet.plotMeanGenotypeStack(aggData, style, vLinesCoords=ssDays)
     figB.get_axes()[0].set_xlim(style["xRange"][0], style["xRange"][1])
     figB.get_axes()[0].set_ylim(style["yRange"][0], style["yRange"][1])
     plt.title('[Fraction: Week] :: ' + title, fontsize=5)
     monet.quickSaveFigure(figB,
                           pathRoot + "S_" + experimentString + ".png",
Beispiel #8
0
def sum_nodes_in_cluster(key, cluster_dict):
    """
	Gets the summed output vector for the nodes aggregated into each cluster, and calls get_diff_cluster_vs_full to calculate the 
	difference betweeen this aggregation level's summed output vector, and the full resolution output vector. 

	Input:
		key: string that should be used as the key in the dict. Can be parsed for the number of nodes, and the run ID.
		cluster_dict: is a dictionary where cluster ID maps to list of node IDs that cluster summarizes.

	Output: None
		Calls get_diff_cluster_vs_full() writes the summed output difference vector for each of the clusters to the CSV.
	
	Note to Gillian: The side effect CSV that writes the summed output difference vector is called 
	`agg_C000002/cluster_0000/run_Yorkeys01_0078_ATrueTrue` for example. Therefore, we have:
		1. aggregation level
		2. cluster ID
		3. runID, M, F
	"""
    agg_level, run_id = key.split('/')
    print(agg_level, run_id)

    if not os.path.exists("agg_" + agg_level):
        os.mkdir("agg_" + agg_level)

    clusters = list(cluster_dict.keys())
    # iterate over the clusters
    for c in clusters:
        print("Processing cluster ID ", c)
        nodes_to_summarize = cluster_dict[c]

        if not os.path.exists("agg_" + agg_level + "/cluster_" +
                              process_node_id(c)):
            os.mkdir("agg_" + agg_level + "/cluster_" + process_node_id(c))

        filenames = dict()
        filenames['male'] = []
        filenames['female'] = []

        # grab all the filepaths for one cluster
        for node_id in nodes_to_summarize:
            # print("Here is ", node_id)
            processed_node_id = process_node_id(node_id)

            # pick a run: Yorkeys01_0027_A
            # print(os.listdir(start_ref))
            list_dir = [f for f in os.listdir(ref_dir)]
            i = int(np.random.uniform(0, len(os.listdir(ref_dir))))
            print(i)
            print(len(list_dir))

            # build reference_pop
            # reference_pop = ref_dir + '/'
            reference_pop = ref_dir + '/' + end_ref  # reference_pop += list_dir[i]
            # reference_pop += end_ref

            file_path = os.path.join(reference_pop + "F_Mean_Patch" +
                                     processed_node_id + ".csv")
            filenames['female'].append(file_path)

            file_path = os.path.join(reference_pop + "M_Mean_Patch" +
                                     processed_node_id + ".csv")
            filenames['male'].append(file_path)

        # sum all the filepaths inside one cluster, then group by genotypes
        cluster_sum = monet.sumLandscapePopulationsFromFiles(
            filenames, male=maleToggle, female=femaleToggle, dataType=float)
        # returns a dictionary with 'population' as the time series population vector
        clusterGeno_sum = monet.aggregateGenotypesInNode(
            cluster_sum, aggregationDictionary)

        # take the difference between the sum and the clustered result
        # male
        male = str(agg_level) + "/" + str(
            run_id
        ) + "/ANALYZED/E_0730_30_20_02_00020/M_Mean_Patch" + process_node_id(
            c) + ".csv"
        exp_path_M = os.path.join(path_to_all_experiments, male)
        # female
        female = str(agg_level) + "/" + str(
            run_id
        ) + "/ANALYZED/E_0730_30_20_02_00020/F_Mean_Patch" + process_node_id(
            c) + ".csv"
        exp_path_F = os.path.join(path_to_all_experiments, female)

        filenames2 = dict()
        filenames2['male'] = [exp_path_M]
        filenames2['female'] = [exp_path_F]

        agg_sum = monet.sumLandscapePopulationsFromFiles(filenames2,
                                                         male=maleToggle,
                                                         female=femaleToggle)
        aggGeno_sum = monet.aggregateGenotypesInNode(agg_sum,
                                                     aggregationDictionary)

        # take the difference between the aggregated_sum vector and the full unaggregated sum vector, writes it to CSV
        get_diff_cluster_vs_full(
            aggGeno_sum, clusterGeno_sum,
            "agg_" + agg_level + "/cluster_" + process_node_id(c) + "/run_" +
            run_id + str(maleToggle) + str(femaleToggle))
Beispiel #9
0
    expBaseName = "Yorkeys_AGG_1_"
    pathRoot = "/Volumes/marshallShare/ERACR/Fowler4/Experiment/"
    truthExperiment = expBaseName + "00250"  #"02195"
    expsList = [1, 10, 50, 100, 250, 500, 750, 1000, 1250, 1500, 1750, 1971]
pathSet = pathRoot + expBaseName + "*/"
# #############################################################################
# Setting up the experiments paths
# #############################################################################
foldersList = sorted(glob.glob(pathSet + "*ANALYZED"))
truthExpPath = glob.glob(pathRoot + truthExperiment + "/ANALYZED/*")[0] + "/"
# #############################################################################
# Calculating the baseline level (unaggregated)
# #############################################################################
filenames = monet.readExperimentFilenames(truthExpPath)
landscapeSumData = monet.sumLandscapePopulationsFromFiles(filenames)
basePopDyns = monet.aggregateGenotypesInNode(landscapeSumData, aux.genAggDict)
ref = basePopDyns['population']
# #############################################################################
# Experiment iterations
# #############################################################################
for i in expsList:
    # #########################################################################
    # Calculating the error metric
    # #########################################################################
    refExperiment = expBaseName + str(i).rjust(5, "0")
    print(pathRoot + refExperiment)
    refExpPath = glob.glob(pathRoot + refExperiment + "/ANALYZED/*")[0] + "/"
    filenames = monet.readExperimentFilenames(refExpPath)
    landscapeSumData = monet.sumLandscapePopulationsFromFiles(filenames)
    refPopDyns = monet.aggregateGenotypesInNode(landscapeSumData,
                                                aux.genAggDict)
Beispiel #10
0
humanFiles = [glob(i + '/H_*')[0] for i in dirsTraces]
hData = [
    np.loadtxt(i, skiprows=1, delimiter=',', usecols=(1, 2))
    for i in humanFiles
]
(days, states) = hData[0].shape
# Mosquito files --------------------------------------------------------------
mID = ('FS', 'FE', 'FI')
mPops = {}
for id in mID:
    FIfiles = [glob(i + '/' + id + '*.csv')[0] for i in dirsTraces]
    pops = []
    for file in FIfiles:
        dta = np.loadtxt(file, skiprows=1, delimiter=',', usecols=(1, ))
        nodeData = monet.loadNodeData(femaleFilename=file)
        pop = monet.aggregateGenotypesInNode(nodeData, HLT)['population']
        pops.append(pop)
    mPops[id] = pops
# Populations summed into one node (disregards infection status) --------------
sums = []
for r in range(len(dirsTraces)):
    sums.append(mPops['FS'][r] + mPops['FE'][r] + mPops['FI'][r])
# Mosquito files --------------------------------------------------------------
mID = ('FS', 'FE', 'FI')
mPopsECO = {}
for id in mID:
    FIfiles = [glob(i + '/' + id + '*.csv')[0] for i in dirsTraces]
    pops = []
    for file in FIfiles:
        dta = np.loadtxt(file, skiprows=1, delimiter=',', usecols=(1, ))
        nodeData = monet.loadNodeData(femaleFilename=file)