def getTheGenes(theStartDate, templateList, dirr=os.getcwd()): """Walking the dir using Python 3.5. Variable theStartDate has to be a datetime.date() data type.""" vv = ppma.lookForVARinList(templateList) datOut = [] # dataOrdering = ['VAR', 'VARX', 'meanAllel', 'stdAllel', 'slope'] for dirName, subdirList, fileList in os.walk(dirr): for file in fileList: filepath = os.path.join(dirName, file) if(filepath == os.path.join(dirName, 'InputParameters.json') and ppma.loadTheDateFromParamFile(filepath) >= theStartDate): paramzList = ppma.loadParamSettings(filepath) # with open(filepath) as f: # prms = json.load(f) if ppma.compareParams(templateList, paramzList): print("Data from:", dirName, end=" ") popFiles = os.path.join(dirName, "HostGenomesFile.*.csv") for fil in glob.glob(popFiles): if not re.search('HostGenomesFile.0.csv', fil): hostPopFile = fil Gene_list = loadHostPopulation(hostPopFile) geneStats = analiseGeneContent(Gene_list) var = float(paramzList[vv['VAR']]) varx = float(paramzList[vv['VARX']]) geneStats['spp'] = varx geneStats['patho_mut'] = var datOut.append((geneStats)) print("- done!") return datOut
def main(): """Main function - the script's main body.""" if len(sys.argv) <= 3: print("Two arguments are needed:") print(" 1. Give the path to template file.") print(" 2. Give the name of the output file.") print(" 3. Give the name of HostGenomesFile.XXXX.csv file") sys.exit() headerr = 'VAR VARX MRCA_time maxMutNumb numOfGenes sourceDir' outputFile = str(sys.argv[2]) try: template = ppma.loadParamSettings(sys.argv[1]) except Exception: print("Cannot load the template file. Exiting.") sys.exit() try: theData = serchTheDirs(sys.argv[3], template) except Exception: print("Failed to process the data. Some serious issues arose.") sys.exit() if len(theData): FMT = '%.4e %.4e %.4e %.4e %.4e %s' open(outputFile, 'w').close() np.savetxt(outputFile, theData, fmt=FMT, header=headerr, comments='#') for itm in theData: for ii in range(len(itm) - 1): print(itm[ii], "\t", end=" ") print() print("Check the output file:", str(os.getcwd()) + "/" + outputFile + " for details.") else: print("No data files matching the criterions were found.", "Specify your template file.") sys.exit()
def main(): """Main function - the script's main body.""" if len(sys.argv) <= 3: print("Two arguments are needed:") print(" 1. Give a starting date. It has to be in yyyy-mm-dd format.") print(" 2. Give the path to template file.") print(" 3. Give the name of the output file.") sys.exit() startDate = None headerr = 'VAR VARX slope intercept R2 p_value sr_err patho_number '\ + 'sourceDir' try: startDate = ppma.readDate(sys.argv[1]) outputFile = str(sys.argv[3]) except ValueError: print("Cannot convert argument #1 to a date format.") sys.exit() if startDate: try: template = ppma.loadParamSettings(sys.argv[2]) # x_Label = ppma.getVarxLabel(sys.argv[2]) except Exception: print("Cannot load the template file. Exiting.") sys.exit() try: print("Computing data...") theData = getTheData(startDate, template) except Exception: print("Failed to process the data. Some serious issues arose.") sys.exit() if len(theData): FMT = '%.4e %.4e %.4e %.4e %.4e %.4e %.4e %.4e %s' open(outputFile, 'w').close() np.savetxt(outputFile, theData, fmt=FMT, header=headerr, comments='#') for itm in theData: for ii in range(len(itm) - 1): print(itm[ii], "\t", end=" ") print() print("Check the output file:", str(os.getcwd()) + "/" + outputFile + " for details.") else: print("No data files matching the criterions were found.", "Specify your template file.") sys.exit() else: print("Wrong date format.") sys.exit()
def main(): """ """ """Main function - the script's main body.""" if len(sys.argv) <= 3: print("Two arguments are needed:") print(" 1. Give a starting date. It has to be in yyyy-mm-dd format.") print(" 2. Give the path to template file.") print(" 3. Give the output figure name's prefix (e.g. the number", "of individual number of MHC variants.") sys.exit() startDate = None try: startDate = ppma.readDate(sys.argv[1]) ymaxx = 75 frame = 250 except ValueError: print("Cannot convert argument #1 to a date format.") sys.exit() if startDate: try: template = ppma.loadParamSettings(sys.argv[2]) if template is None: print( "Failed to load the template file. Exiting.", "Check if the path is correct - you may wish to provide", "an absolute path.") sys.exit() figLabel = ppma.getVarxLabel(sys.argv[2]) except Exception: print("Cannot load the template file. Exiting.") sys.exit() if True: # third argument is very important theData = getTheData(startDate, template) # print(theData) # except Exception: else: print("Failed to process the data. Some serious issues arose.") sys.exit() if len(theData): aggrOutCI = aggrDataByRunsCI(theData) plotAggrOut(aggrOutCI, frame, ymaxx, figLabel) else: print("No data files matching the criterions were found.", "Specify your template file.") sys.exit() else: print("Wrong date format.") sys.exit()
def main(): """Main function - the script's main body.""" if len(sys.argv) <= 3: print("Two arguments are needed:") print(" 1. Give a starting date. It has to be in yyyy-mm-dd format.") print(" 2. Give the path to template file.") print(" 3. The last N generations to analyse. Type 0 if you wand to", "analyse everything.") print(" 4. Give the plot file suffix.") sys.exit() startDate = None try: startDate = ppma.readDate(sys.argv[1]) except ValueError: print("Cannot convert argument #1 to a date format.") sys.exit() try: cc = int(sys.argv[3]) except ValueError: print("Cannot convert argument #3 to integer.") sys.exit() if startDate: try: template = ppma.loadParamSettings(sys.argv[2]) print("The template:", template) except Exception: print("Cannot load the template file. Exiting.") sys.exit() try: wdir = os.getcwd() print("Working directory:", wdir) theData = getTheData(startDate, template, wdir, cc) except Exception: print( "Failed to process the data. Some serious issues arose.", "Check if the cut-off host generation for calculating stats", "is smaller than the total number of host generations.") sys.exit() if len(theData): np.save("sexSelectStrgt" + sys.argv[4], theData) out = avgDatOut(theData) justPlotDeviantFromMeanFather(out[:, 0], out[:, 1], out[:, 2], ".", sys.argv[4]) else: print("No data files matching the criterions were found.", "Specify your template file.") sys.exit() else: print("Wrong date format.") sys.exit()
def getTheData(theStartDate, templateList, dirr=os.getcwd()): """Walking the dir using Python 3.6. Variable theStartDate has to be a datetime.date() data type.""" vv = ppma.lookForVARinList(templateList) datOut = [] dataOrdering = ['VAR', 'VARX', 'slope', 'intercept'] for dirName, subdirList, fileList in os.walk(dirr): for file in fileList: filepath = os.path.join(dirName, file) if (filepath == os.path.join(dirName, 'InputParameters.json') and ppma.loadTheDateFromParamFile(filepath) >= theStartDate): paramzList = ppma.loadParamSettings(filepath) if ppma.compareParams(templateList, paramzList): with open(filepath) as f: prms = json.load(f) path_spp = float(prms['number_of_pathogen_species']) lg = prms['number_of_host_generations'] genomeFileName = "HostGenomesFile." + str(lg) + ".csv" genomeFileName = os.path.join(dirName, genomeFileName) # print(genomeFileName) var = float(paramzList[vv['VAR']]) varx = float(paramzList[vv['VARX']]) try: print(dirName, end=' : ') pathos = loadPathoExposed(genomeFileName) hosts = loadHostPopulation(genomeFileName) if hosts is None or pathos is None: print("Failed to read data") continue else: print("Done") except Exception: print("ERROR in getTheData(): cant's load the host", "population data") continue uniqNumb, pathoNumb = calculateTheNumbers(hosts, pathos) uniqNumb = np.hstack((uniqNumb, 0)) pathoNumb = np.hstack((pathoNumb, 0)) # slope, intercept, r_val, p_val, std_err data = linregress(uniqNumb, pathoNumb) plotMHCvsPathoPresent(uniqNumb, pathoNumb, data[0], data[1], dirName) datOut.append((var, varx, data[0], data[1], data[2]**2, data[3], data[4], path_spp, dirName)) datOut = np.array(datOut, dtype=outType) return np.sort(datOut, order=dataOrdering)
def getTheData(theStartDate, templateList, dirr=os.getcwd()): """ """ datOut = [] vv = ppma.lookForVARinList(templateList) for dirName, subdirList, fileList in os.walk(dirr): for file in fileList: filepath = os.path.join(dirName, file) if (filepath == os.path.join(dirName, 'InputParameters.json') and ppma.loadTheDateFromParamFile(filepath) >= theStartDate): paramzList = ppma.loadParamSettings(filepath) if ppma.compareParams(templateList, paramzList): var = float(paramzList[vv['VAR']]) varx = float(paramzList[vv['VARX']]) awkMeanINV(dirName) meanINV = loadMeanInvdMhcNumb(dirName) datOut.append((var, varx, meanINV)) print("Done dir:", dirName) return datOut
def main(): """Main function - the script's main body.""" if len(sys.argv) <= 2: print("Two arguments are needed:") print(" 1. Give a starting date. It has to be in yyyy-mm-dd format.") print(" 2. Give the path to template file.") sys.exit() startDate = None try: startDate = ppma.readDate(sys.argv[1]) except ValueError: print("Cannot convert argument #1 to a date format.") sys.exit() if startDate: try: template = ppma.loadParamSettings(sys.argv[2]) if template is None: print("Failed to load the template file. Exiting.", "Check if the path is correct - you may wish to provide", "an absolute path.") sys.exit() except Exception: print("Cannot load the template file. Exiting.") sys.exit() try: datOut = getTheGenes(startDate, template, os.getcwd()) result = pd.concat(datOut, ignore_index=True) plotFraction(result) plotHetero(result) plotTotNumb(result) plotChrVsUnq(result, 'corr', 'chr_2', 'unq_2') plotChrVsUnq(result, 'slope', 'chr_2', 'unq_2') plotChrVsUnq(result, 'corr', 'chr_1', 'unq_1') plotChrVsUnq(result, 'slope', 'chr_1', 'unq_1') plotChromoProp(result) except Exception: print("Failed to process the data. Some serious issues arose.", "Check if the cut-off host generation for calculating stats", "is smaller than the total number of host generations.") sys.exit()
def getTheData(theStartDate, templateList, dirr=os.getcwd(), genLast=0): """Walking the dir using Python 3.5. Variable theStartDate has to be a datetime.date() data type. Each item in the `datOut` structure is the result of computing one simulation.""" datOut = [] for dirName, subdirList, fileList in os.walk(dirr): for file in fileList: filepath = os.path.join(dirName, file) if (filepath == os.path.join(dirName, 'InputParameters.json') and ppma.loadTheDateFromParamFile(filepath) >= theStartDate): paramzList = ppma.loadParamSettings(filepath) if ppma.compareParams(templateList, paramzList): print("Processing dir:", dirName, end=" ") moPth = os.path.join(dirName, 'NumberOfMhcInMother.csv') faPth = os.path.join(dirName, 'NumberOfMhcInFather.csv') mPth = os.path.join(dirName, 'NumberOfMhcBeforeMating.csv') mothr, fathr, bmate = loadTheParents( genLast, moPth, faPth, mPth) moth, fath, bmate = trimData(mothr, fathr, bmate, 2, 100) mmMt = avrgMateMHCnumb(bmate) rMom, rDad, rMmMt = reshapeMatherFather(moth, fath, mmMt) ww, Fatrs, meanM = pickMotherSizeGroups(rMom, rDad, rMmMt) bSize = np.zeros(len(Fatrs)) for i, itm in enumerate(Fatrs): bSize[i] = len(itm) deltas = [] for i, it in enumerate(ww): deltas.append(np.nanmean(Fatrs[i] - meanM[i])) justPlotDeviantFromMeanFather(ww, deltas, bSize, dirName) try: xx = np.transpose( np.vstack((ww, np.array(deltas), bSize))) except Exception: print(" - failed to stack the data! Check if the", "input file sizes (e.g. line numbers) are OK.") continue datOut.append(xx) print(" - done.") return datOut
def serchTheDirs(FILE, template, dirr=os.getcwd()): """Walk the directory tree in search of model runs and process each simulation individually. Produces some meta-statistics regarding the results geathered in Numpy structured array.""" # vv = ppma.lookForVAR(template) vv = ppma.lookForVARinList(template) datOut = [] dataOrdering = ['VAR', 'VARX', 'MRCA_time', 'maxMutNumb', 'numOfGenes'] for dirName, subdirList, fileList in os.walk(dirr): for file in fileList: filepath = os.path.join(dirName, file) if filepath == os.path.join(dirName, FILE): try: paramList = ppma.loadParamSettings(os.path.join(dirName, "InputParameters.json")) except Exception: print("Cannot load the parameters. in dir", dirName) continue if ppma.compareParams(template, paramList): try: DATA = processDataOneFile(filepath) except Exception: print("Cannot load the data. in dir", dirName) continue plotTheTimes(DATA[0], DATA[1], DATA[2], DATA[3], DATA[4], dirName) var = float(paramList[vv['VAR']]) varx = float(paramList[vv['VARX']]) datOut.append((var, varx, DATA[6], DATA[0].shape[1], DATA[0].shape[0], dirName)) if len(datOut) > 0: datOut = np.array(datOut, dtype=outType) return np.sort(datOut, order=dataOrdering) else: print("ERROR in serchTheDirs(): output array is empty") return None