def aggregateDataFromPath(path, aggregationDictionary): filenames = monet.readExperimentFilenames(path) landscapeSumData = monet.sumLandscapePopulationsFromFiles( filenames, male=True, female=True, dataType=float ) aggData = monet.aggregateGenotypesInNode(landscapeSumData, aggregationDictionary) return aggData
def preProcessSubLandscape(pop, landReps, fName, drive, nodesAggLst, nodeAggIx, MF=(True, True), cmpr='bz2', SUM=True, AGG=True, SPA=True, REP=True, SRP=True): """ Preprocesses a subset of the landscape Args: pop (list): Files list element aggregated by landscape subset landReps (dict): Landscape repetitions (spatial from monet.loadAndAggregateLandscapeDataRepetitions) fName (str): Filename (including path) drive (dict): Gene-drive dictionary nodesAggLst (lst): List of lists containing the indices of the nodes to be aggregated together nodeAggIx (int): Current list to process (from the nodeAggLst) MF (bool tuple): Male and Female boolean selectors cmpr (str): Compression algorithm to be used by compress-python SUM (bool): Population summed and gene-aggregated into one node AGG (bool): Population gene-aggregated in their own nodes SPA (bool): Genetic landscape (gene-aggregated) REP (bool): Garbage gene-aggregated data SRP (bool): Summed into one garbage gene-aggregated data Returns: None """ if SUM: sumData = monet.sumLandscapePopulationsFromFiles(pop, MF[0], MF[1]) sumAgg = monet.aggregateGenotypesInNode(sumData, drive) pkl.dump(sumAgg, fName + '_sum', compression=cmpr) if AGG: aggData = monet.loadAndAggregateLandscapeData(pop, drive, MF[0], MF[1]) pkl.dump(aggData, fName + '_agg', compression=cmpr) if SPA: geneSpaTemp = monet.getGenotypeArraysFromLandscape(aggData) pkl.dump(geneSpaTemp, fName + '_spa', compression=cmpr) if REP or SRP: fLandReps = monet.filterAggregateGarbageByIndex( landReps, nodesAggLst[nodeAggIx]) pkl.dump(fLandReps, fName + '_rep', compression=cmpr) if SRP: fRepsSum = [sum(i) for i in fLandReps['landscapes']] fRepsDict = { 'genotypes': fLandReps['genotypes'], 'landscapes': fRepsSum } pkl.dump(fRepsDict, fName + '_srp', compression=cmpr) return None
def getAggDataSSDay(pathsRoot, i): pathSample = pathsRoot[i] + "/" experimentString = pathSample.split("/")[-2] filenames = monet.readExperimentFilenames(pathSample) landscapeSumData = monet.sumLandscapePopulationsFromFiles(filenames, male=True, female=True, dataType=float) aggData = monet.aggregateGenotypesInNode(landscapeSumData, aggregationDictionary) ssDay = aux.reachedSteadtStateAtDay(aggData, .01) return aggData, ssDay, experimentString
def calculateGeneTemporal(filenames): landscapeSumData = monet.sumLandscapePopulationsFromFiles( filenames, male=True, female=False, dataType=float ) genotypes = landscapeSumData["genotypes"] aggregationDictionary = monet.autoGenerateGenotypesDictionary( ["W", "H", "R", "B"], genotypes ) aggData = monet.aggregateGenotypesInNode( landscapeSumData, aggregationDictionary ) return aggData
def loadFolderAndWriteFactorialCSVInclude(experimentString, path, aggregationDictionary, ratiosDictionary, male=True, female=True, dataType=float, includePattern='*'): """ Description: * Wrapper function to perform the whole factorial parsing analysis on a folder and write the resulting CSV to drive. In: * experimentString: * path: Directory where the experiment is stored. * aggregationDictionary: Dictionary containing the keys to aggregate the genotypes (created with "generateAggregationDictionary") * ratiosDictionary: "numerator", and "denominator" lists dictionary containing the columns to use in each section of the ratio. Out: * NA Notes: * NA """ # Read filenames filenames = monet.readExperimentFilenames(path + experimentString) # Filter out non-needed files (mFiles, fFiles) = (filenames['male'], filenames['female']) filenames['male'] = [i for i in mFiles if includePattern in i] filenames['female'] = [i for i in fFiles if includePattern in i] # Aggregate data landscapeSumData = monet.sumLandscapePopulationsFromFiles( filenames, male=male, female=female, dataType=dataType) aggregateData = monet.aggregateGenotypesInNode(landscapeSumData, aggregationDictionary) split = monet.splitExperimentString(experimentString) monet.writeFactorialAnalysisCSV(split["releasesNumber"], int(split["coverage"]) / 1000.0, path, experimentString, aggregateData, ratiosDictionary) return None
def preProcessSubLandscapeV2(pop, landReps, fName, drive, nodesAggLst, nodeAggIx, MF=(True, True), cmpr='bz2', SUM=True, AGG=True, SPA=True, REP=True, SRP=True): if SUM: sumData = monet.sumLandscapePopulationsFromFiles(pop, MF[0], MF[1]) sumAgg = monet.aggregateGenotypesInNode(sumData, drive) pkl.dump(sumAgg, fName + '_sum', compression=cmpr) if AGG: aggData = monet.loadAndAggregateLandscapeData(pop, drive, MF[0], MF[1]) pkl.dump(aggData, fName + '_agg', compression=cmpr) if SPA: geneSpaTemp = monet.getGenotypeArraysFromLandscape(aggData) pkl.dump(geneSpaTemp, fName + '_spa', compression=cmpr) if REP or SRP: fLandReps = monet.filterAggregateGarbageByIndex( landReps, nodesAggLst[nodeAggIx]) if REP: pkl.dump(fLandReps, fName + '_rep', compression=cmpr) if SRP: fRepsSum = [sum(i) for i in fLandReps['landscapes']] fRepsDict = { 'genotypes': fLandReps['genotypes'], 'landscapes': fRepsSum } pkl.dump(fRepsDict, fName + '_srp', compression=cmpr) return None
print(expsPath) ########################################################################### # Stacked population plots (sums the whole landscape into a single # population count over time) ########################################################################### if STACK: # Parses the paths of all CSV files starting with 'F_' and/or 'M_' filenames = monet.readExperimentFilenames(expsPath) # Loads all the files provided and sums them into one array of dims: # [originalGenotypes, time, [counts]] landscapeSumData = monet.sumLandscapePopulationsFromFiles( filenames, male=maleToggle, female=femaleToggle, dataType=float) # Groups the genotypes into "bins" provided by the # "aggregationDictionary" by summing the counts in each one of the # columns. aggData = monet.aggregateGenotypesInNode(landscapeSumData, aggregationDictionary) # Calculates the dates at which the system arrives to the required # thresholds ssDays = [aux.introgrationDay(aggData, 0, 1 - k) for k in probeRatios] # Plotting-related instructions daysTup = [ fmtStr.format(day[1], day[0] / 7) for day in zip(ssDays, probeRatios) ] title = ' '.join(daysTup) figB = monet.plotMeanGenotypeStack(aggData, style, vLinesCoords=ssDays) figB.get_axes()[0].set_xlim(style["xRange"][0], style["xRange"][1]) figB.get_axes()[0].set_ylim(style["yRange"][0], style["yRange"][1]) plt.title('[Fraction: Week] :: ' + title, fontsize=5) monet.quickSaveFigure(figB, pathRoot + "S_" + experimentString + ".png",
def sum_nodes_in_cluster(key, cluster_dict): """ Gets the summed output vector for the nodes aggregated into each cluster, and calls get_diff_cluster_vs_full to calculate the difference betweeen this aggregation level's summed output vector, and the full resolution output vector. Input: key: string that should be used as the key in the dict. Can be parsed for the number of nodes, and the run ID. cluster_dict: is a dictionary where cluster ID maps to list of node IDs that cluster summarizes. Output: None Calls get_diff_cluster_vs_full() writes the summed output difference vector for each of the clusters to the CSV. Note to Gillian: The side effect CSV that writes the summed output difference vector is called `agg_C000002/cluster_0000/run_Yorkeys01_0078_ATrueTrue` for example. Therefore, we have: 1. aggregation level 2. cluster ID 3. runID, M, F """ agg_level, run_id = key.split('/') print(agg_level, run_id) if not os.path.exists("agg_" + agg_level): os.mkdir("agg_" + agg_level) clusters = list(cluster_dict.keys()) # iterate over the clusters for c in clusters: print("Processing cluster ID ", c) nodes_to_summarize = cluster_dict[c] if not os.path.exists("agg_" + agg_level + "/cluster_" + process_node_id(c)): os.mkdir("agg_" + agg_level + "/cluster_" + process_node_id(c)) filenames = dict() filenames['male'] = [] filenames['female'] = [] # grab all the filepaths for one cluster for node_id in nodes_to_summarize: # print("Here is ", node_id) processed_node_id = process_node_id(node_id) # pick a run: Yorkeys01_0027_A # print(os.listdir(start_ref)) list_dir = [f for f in os.listdir(ref_dir)] i = int(np.random.uniform(0, len(os.listdir(ref_dir)))) print(i) print(len(list_dir)) # build reference_pop # reference_pop = ref_dir + '/' reference_pop = ref_dir + '/' + end_ref # reference_pop += list_dir[i] # reference_pop += end_ref file_path = os.path.join(reference_pop + "F_Mean_Patch" + processed_node_id + ".csv") filenames['female'].append(file_path) file_path = os.path.join(reference_pop + "M_Mean_Patch" + processed_node_id + ".csv") filenames['male'].append(file_path) # sum all the filepaths inside one cluster, then group by genotypes cluster_sum = monet.sumLandscapePopulationsFromFiles( filenames, male=maleToggle, female=femaleToggle, dataType=float) # returns a dictionary with 'population' as the time series population vector clusterGeno_sum = monet.aggregateGenotypesInNode( cluster_sum, aggregationDictionary) # take the difference between the sum and the clustered result # male male = str(agg_level) + "/" + str( run_id ) + "/ANALYZED/E_0730_30_20_02_00020/M_Mean_Patch" + process_node_id( c) + ".csv" exp_path_M = os.path.join(path_to_all_experiments, male) # female female = str(agg_level) + "/" + str( run_id ) + "/ANALYZED/E_0730_30_20_02_00020/F_Mean_Patch" + process_node_id( c) + ".csv" exp_path_F = os.path.join(path_to_all_experiments, female) filenames2 = dict() filenames2['male'] = [exp_path_M] filenames2['female'] = [exp_path_F] agg_sum = monet.sumLandscapePopulationsFromFiles(filenames2, male=maleToggle, female=femaleToggle) aggGeno_sum = monet.aggregateGenotypesInNode(agg_sum, aggregationDictionary) # take the difference between the aggregated_sum vector and the full unaggregated sum vector, writes it to CSV get_diff_cluster_vs_full( aggGeno_sum, clusterGeno_sum, "agg_" + agg_level + "/cluster_" + process_node_id(c) + "/run_" + run_id + str(maleToggle) + str(femaleToggle))
expBaseName = "Yorkeys_AGG_1_" pathRoot = "/Volumes/marshallShare/ERACR/Fowler4/Experiment/" truthExperiment = expBaseName + "00250" #"02195" expsList = [1, 10, 50, 100, 250, 500, 750, 1000, 1250, 1500, 1750, 1971] pathSet = pathRoot + expBaseName + "*/" # ############################################################################# # Setting up the experiments paths # ############################################################################# foldersList = sorted(glob.glob(pathSet + "*ANALYZED")) truthExpPath = glob.glob(pathRoot + truthExperiment + "/ANALYZED/*")[0] + "/" # ############################################################################# # Calculating the baseline level (unaggregated) # ############################################################################# filenames = monet.readExperimentFilenames(truthExpPath) landscapeSumData = monet.sumLandscapePopulationsFromFiles(filenames) basePopDyns = monet.aggregateGenotypesInNode(landscapeSumData, aux.genAggDict) ref = basePopDyns['population'] # ############################################################################# # Experiment iterations # ############################################################################# for i in expsList: # ######################################################################### # Calculating the error metric # ######################################################################### refExperiment = expBaseName + str(i).rjust(5, "0") print(pathRoot + refExperiment) refExpPath = glob.glob(pathRoot + refExperiment + "/ANALYZED/*")[0] + "/" filenames = monet.readExperimentFilenames(refExpPath) landscapeSumData = monet.sumLandscapePopulationsFromFiles(filenames) refPopDyns = monet.aggregateGenotypesInNode(landscapeSumData, aux.genAggDict)
humanFiles = [glob(i + '/H_*')[0] for i in dirsTraces] hData = [ np.loadtxt(i, skiprows=1, delimiter=',', usecols=(1, 2)) for i in humanFiles ] (days, states) = hData[0].shape # Mosquito files -------------------------------------------------------------- mID = ('FS', 'FE', 'FI') mPops = {} for id in mID: FIfiles = [glob(i + '/' + id + '*.csv')[0] for i in dirsTraces] pops = [] for file in FIfiles: dta = np.loadtxt(file, skiprows=1, delimiter=',', usecols=(1, )) nodeData = monet.loadNodeData(femaleFilename=file) pop = monet.aggregateGenotypesInNode(nodeData, HLT)['population'] pops.append(pop) mPops[id] = pops # Populations summed into one node (disregards infection status) -------------- sums = [] for r in range(len(dirsTraces)): sums.append(mPops['FS'][r] + mPops['FE'][r] + mPops['FI'][r]) # Mosquito files -------------------------------------------------------------- mID = ('FS', 'FE', 'FI') mPopsECO = {} for id in mID: FIfiles = [glob(i + '/' + id + '*.csv')[0] for i in dirsTraces] pops = [] for file in FIfiles: dta = np.loadtxt(file, skiprows=1, delimiter=',', usecols=(1, )) nodeData = monet.loadNodeData(femaleFilename=file)