Esempio n. 1
0
def getTheGenes(theStartDate, templateList, dirr=os.getcwd()):
    """Walking the dir using Python 3.5. Variable theStartDate has to be
    a datetime.date() data type."""
    vv = ppma.lookForVARinList(templateList)
    datOut = []
#    dataOrdering = ['VAR', 'VARX', 'meanAllel', 'stdAllel', 'slope']
    for dirName, subdirList, fileList in os.walk(dirr):
        for file in fileList:
            filepath = os.path.join(dirName, file)
            if(filepath == os.path.join(dirName, 'InputParameters.json') and
               ppma.loadTheDateFromParamFile(filepath) >= theStartDate):
                paramzList = ppma.loadParamSettings(filepath)
#                with open(filepath) as f:
#                    prms = json.load(f)
                if ppma.compareParams(templateList, paramzList):
                    print("Data from:", dirName, end=" ")
                    popFiles = os.path.join(dirName, "HostGenomesFile.*.csv")
                    for fil in glob.glob(popFiles):
                        if not re.search('HostGenomesFile.0.csv', fil):
                            hostPopFile = fil
                    Gene_list = loadHostPopulation(hostPopFile)
                    geneStats = analiseGeneContent(Gene_list)
                    var = float(paramzList[vv['VAR']])
                    varx = float(paramzList[vv['VARX']])
                    geneStats['spp'] = varx
                    geneStats['patho_mut'] = var
                    datOut.append((geneStats))
                    print("- done!")
    return datOut
def main():
    """Main function - the script's main body."""
    if len(sys.argv) <= 3:
        print("Two arguments are needed:")
        print("  1. Give the path to template file.")
        print("  2. Give the name of the output file.")
        print("  3. Give the name of HostGenomesFile.XXXX.csv file")
        sys.exit()
    headerr = 'VAR VARX MRCA_time maxMutNumb numOfGenes sourceDir'
    outputFile = str(sys.argv[2])
    try:
        template = ppma.loadParamSettings(sys.argv[1])
    except Exception:
        print("Cannot load the template file. Exiting.")
        sys.exit()
    try:
        theData = serchTheDirs(sys.argv[3], template)
    except Exception:
        print("Failed to process the data. Some serious issues arose.")
        sys.exit()
    if len(theData):
        FMT = '%.4e %.4e %.4e %.4e %.4e %s'
        open(outputFile, 'w').close()
        np.savetxt(outputFile, theData, fmt=FMT, header=headerr,
                   comments='#')
        for itm in theData:
            for ii in range(len(itm) - 1):
                print(itm[ii], "\t", end=" ")
            print()
        print("Check the output file:", str(os.getcwd()) + "/" + outputFile +
              " for details.")
    else:
        print("No data files matching the criterions were found.",
              "Specify your template file.")
        sys.exit()
def main():
    """Main function - the script's main body."""
    if len(sys.argv) <= 3:
        print("Two arguments are needed:")
        print("  1. Give a starting date. It has to be in yyyy-mm-dd format.")
        print("  2. Give the path to template file.")
        print("  3. Give the name of the output file.")
        sys.exit()
    startDate = None
    headerr = 'VAR VARX slope intercept R2 p_value sr_err patho_number '\
        + 'sourceDir'
    try:
        startDate = ppma.readDate(sys.argv[1])
        outputFile = str(sys.argv[3])
    except ValueError:
        print("Cannot convert argument #1 to a date format.")
        sys.exit()
    if startDate:
        try:
            template = ppma.loadParamSettings(sys.argv[2])


#            x_Label = ppma.getVarxLabel(sys.argv[2])
        except Exception:
            print("Cannot load the template file. Exiting.")
            sys.exit()
        try:
            print("Computing data...")
            theData = getTheData(startDate, template)
        except Exception:
            print("Failed to process the data. Some serious issues arose.")
            sys.exit()
        if len(theData):
            FMT = '%.4e %.4e %.4e %.4e %.4e %.4e %.4e %.4e %s'
            open(outputFile, 'w').close()
            np.savetxt(outputFile,
                       theData,
                       fmt=FMT,
                       header=headerr,
                       comments='#')
            for itm in theData:
                for ii in range(len(itm) - 1):
                    print(itm[ii], "\t", end=" ")
                print()
            print("Check the output file:",
                  str(os.getcwd()) + "/" + outputFile + " for details.")
        else:
            print("No data files matching the criterions were found.",
                  "Specify your template file.")
            sys.exit()
    else:
        print("Wrong date format.")
        sys.exit()
Esempio n. 4
0
def main():
    """ """
    """Main function - the script's main body."""
    if len(sys.argv) <= 3:
        print("Two arguments are needed:")
        print("  1. Give a starting date. It has to be in yyyy-mm-dd format.")
        print("  2. Give the path to template file.")
        print("  3. Give the output figure name's prefix (e.g. the number",
              "of individual number of MHC variants.")
        sys.exit()
    startDate = None
    try:
        startDate = ppma.readDate(sys.argv[1])
        ymaxx = 75
        frame = 250
    except ValueError:
        print("Cannot convert argument #1 to a date format.")
        sys.exit()
    if startDate:
        try:
            template = ppma.loadParamSettings(sys.argv[2])
            if template is None:
                print(
                    "Failed to load the template file. Exiting.",
                    "Check if the path is correct - you may wish to provide",
                    "an absolute path.")
                sys.exit()
            figLabel = ppma.getVarxLabel(sys.argv[2])
        except Exception:
            print("Cannot load the template file. Exiting.")
            sys.exit()
        if True:
            # third argument is very important
            theData = getTheData(startDate, template)


#            print(theData)
#        except Exception:
        else:
            print("Failed to process the data. Some serious issues arose.")
            sys.exit()
        if len(theData):
            aggrOutCI = aggrDataByRunsCI(theData)
            plotAggrOut(aggrOutCI, frame, ymaxx, figLabel)
        else:
            print("No data files matching the criterions were found.",
                  "Specify your template file.")
            sys.exit()
    else:
        print("Wrong date format.")
        sys.exit()
def main():
    """Main function - the script's main body."""
    if len(sys.argv) <= 3:
        print("Two arguments are needed:")
        print("  1. Give a starting date. It has to be in yyyy-mm-dd format.")
        print("  2. Give the path to template file.")
        print("  3. The last N generations to analyse. Type 0 if you wand to",
              "analyse everything.")
        print("  4. Give the plot file suffix.")
        sys.exit()
    startDate = None
    try:
        startDate = ppma.readDate(sys.argv[1])
    except ValueError:
        print("Cannot convert argument #1 to a date format.")
        sys.exit()
    try:
        cc = int(sys.argv[3])
    except ValueError:
        print("Cannot convert argument #3 to integer.")
        sys.exit()
    if startDate:
        try:
            template = ppma.loadParamSettings(sys.argv[2])
            print("The template:", template)
        except Exception:
            print("Cannot load the template file. Exiting.")
            sys.exit()
        try:
            wdir = os.getcwd()
            print("Working directory:", wdir)
            theData = getTheData(startDate, template, wdir, cc)
        except Exception:
            print(
                "Failed to process the data. Some serious issues arose.",
                "Check if the cut-off host generation for calculating stats",
                "is smaller than the total number of host generations.")
            sys.exit()
        if len(theData):
            np.save("sexSelectStrgt" + sys.argv[4], theData)
            out = avgDatOut(theData)
            justPlotDeviantFromMeanFather(out[:, 0], out[:, 1], out[:, 2], ".",
                                          sys.argv[4])
        else:
            print("No data files matching the criterions were found.",
                  "Specify your template file.")
            sys.exit()
    else:
        print("Wrong date format.")
        sys.exit()
def getTheData(theStartDate, templateList, dirr=os.getcwd()):
    """Walking the dir using Python 3.6. Variable theStartDate has to be
    a datetime.date() data type."""
    vv = ppma.lookForVARinList(templateList)
    datOut = []
    dataOrdering = ['VAR', 'VARX', 'slope', 'intercept']
    for dirName, subdirList, fileList in os.walk(dirr):
        for file in fileList:
            filepath = os.path.join(dirName, file)
            if (filepath == os.path.join(dirName, 'InputParameters.json') and
                    ppma.loadTheDateFromParamFile(filepath) >= theStartDate):
                paramzList = ppma.loadParamSettings(filepath)
                if ppma.compareParams(templateList, paramzList):
                    with open(filepath) as f:
                        prms = json.load(f)
                    path_spp = float(prms['number_of_pathogen_species'])
                    lg = prms['number_of_host_generations']
                    genomeFileName = "HostGenomesFile." + str(lg) + ".csv"
                    genomeFileName = os.path.join(dirName, genomeFileName)
                    #                    print(genomeFileName)
                    var = float(paramzList[vv['VAR']])
                    varx = float(paramzList[vv['VARX']])
                    try:
                        print(dirName, end=' : ')
                        pathos = loadPathoExposed(genomeFileName)
                        hosts = loadHostPopulation(genomeFileName)
                        if hosts is None or pathos is None:
                            print("Failed to read data")
                            continue
                        else:
                            print("Done")
                    except Exception:
                        print("ERROR in getTheData(): cant's load the host",
                              "population data")
                        continue
                    uniqNumb, pathoNumb = calculateTheNumbers(hosts, pathos)
                    uniqNumb = np.hstack((uniqNumb, 0))
                    pathoNumb = np.hstack((pathoNumb, 0))
                    # slope, intercept, r_val, p_val, std_err
                    data = linregress(uniqNumb, pathoNumb)
                    plotMHCvsPathoPresent(uniqNumb, pathoNumb, data[0],
                                          data[1], dirName)
                    datOut.append((var, varx, data[0], data[1], data[2]**2,
                                   data[3], data[4], path_spp, dirName))
    datOut = np.array(datOut, dtype=outType)
    return np.sort(datOut, order=dataOrdering)
Esempio n. 7
0
def getTheData(theStartDate, templateList, dirr=os.getcwd()):
    """ """
    datOut = []
    vv = ppma.lookForVARinList(templateList)
    for dirName, subdirList, fileList in os.walk(dirr):
        for file in fileList:
            filepath = os.path.join(dirName, file)
            if (filepath == os.path.join(dirName, 'InputParameters.json') and
                    ppma.loadTheDateFromParamFile(filepath) >= theStartDate):
                paramzList = ppma.loadParamSettings(filepath)
                if ppma.compareParams(templateList, paramzList):
                    var = float(paramzList[vv['VAR']])
                    varx = float(paramzList[vv['VARX']])
                    awkMeanINV(dirName)
                    meanINV = loadMeanInvdMhcNumb(dirName)
                    datOut.append((var, varx, meanINV))
                    print("Done dir:", dirName)
    return datOut
Esempio n. 8
0
def main():
    """Main function - the script's main body."""
    if len(sys.argv) <= 2:
        print("Two arguments are needed:")
        print("  1. Give a starting date. It has to be in yyyy-mm-dd format.")
        print("  2. Give the path to template file.")
        sys.exit()
    startDate = None
    try:
        startDate = ppma.readDate(sys.argv[1])
    except ValueError:
        print("Cannot convert argument #1 to a date format.")
        sys.exit()
    if startDate:
        try:
            template = ppma.loadParamSettings(sys.argv[2])
            if template is None:
                print("Failed to load the template file. Exiting.",
                      "Check if the path is correct - you may wish to provide",
                      "an absolute path.")
                sys.exit()
        except Exception:
            print("Cannot load the template file. Exiting.")
            sys.exit()
        try:
            datOut = getTheGenes(startDate, template, os.getcwd())
            result = pd.concat(datOut, ignore_index=True)
            plotFraction(result)
            plotHetero(result)
            plotTotNumb(result)
            plotChrVsUnq(result, 'corr', 'chr_2', 'unq_2')
            plotChrVsUnq(result, 'slope', 'chr_2', 'unq_2')
            plotChrVsUnq(result, 'corr', 'chr_1', 'unq_1')
            plotChrVsUnq(result, 'slope', 'chr_1', 'unq_1')
            plotChromoProp(result)
        except Exception:
            print("Failed to process the data. Some serious issues arose.",
                  "Check if the cut-off host generation for calculating stats",
                  "is smaller than the total number of host generations.")
            sys.exit()
def getTheData(theStartDate, templateList, dirr=os.getcwd(), genLast=0):
    """Walking the dir using Python 3.5. Variable theStartDate has to be
    a datetime.date() data type. Each item in the `datOut` structure is the
    result of computing one simulation."""
    datOut = []
    for dirName, subdirList, fileList in os.walk(dirr):
        for file in fileList:
            filepath = os.path.join(dirName, file)
            if (filepath == os.path.join(dirName, 'InputParameters.json') and
                    ppma.loadTheDateFromParamFile(filepath) >= theStartDate):
                paramzList = ppma.loadParamSettings(filepath)
                if ppma.compareParams(templateList, paramzList):
                    print("Processing dir:", dirName, end=" ")
                    moPth = os.path.join(dirName, 'NumberOfMhcInMother.csv')
                    faPth = os.path.join(dirName, 'NumberOfMhcInFather.csv')
                    mPth = os.path.join(dirName, 'NumberOfMhcBeforeMating.csv')
                    mothr, fathr, bmate = loadTheParents(
                        genLast, moPth, faPth, mPth)
                    moth, fath, bmate = trimData(mothr, fathr, bmate, 2, 100)
                    mmMt = avrgMateMHCnumb(bmate)
                    rMom, rDad, rMmMt = reshapeMatherFather(moth, fath, mmMt)
                    ww, Fatrs, meanM = pickMotherSizeGroups(rMom, rDad, rMmMt)
                    bSize = np.zeros(len(Fatrs))
                    for i, itm in enumerate(Fatrs):
                        bSize[i] = len(itm)
                    deltas = []
                    for i, it in enumerate(ww):
                        deltas.append(np.nanmean(Fatrs[i] - meanM[i]))
                    justPlotDeviantFromMeanFather(ww, deltas, bSize, dirName)
                    try:
                        xx = np.transpose(
                            np.vstack((ww, np.array(deltas), bSize)))
                    except Exception:
                        print(" - failed to stack the data! Check if the",
                              "input file sizes (e.g. line numbers) are OK.")
                        continue
                    datOut.append(xx)
                    print(" - done.")
    return datOut
def serchTheDirs(FILE, template, dirr=os.getcwd()):
    """Walk the directory tree in search of model runs and process each
    simulation individually. Produces some meta-statistics regarding the
    results geathered in Numpy structured array."""
#    vv = ppma.lookForVAR(template)
    vv = ppma.lookForVARinList(template)
    datOut = []
    dataOrdering = ['VAR', 'VARX', 'MRCA_time', 'maxMutNumb', 'numOfGenes']
    for dirName, subdirList, fileList in os.walk(dirr):
        for file in fileList:
            filepath = os.path.join(dirName, file)
            if filepath == os.path.join(dirName, FILE):
                try:
                    paramList = ppma.loadParamSettings(os.path.join(dirName,
                                                       "InputParameters.json"))
                except Exception:
                    print("Cannot load the parameters. in dir", dirName)
                    continue
                if ppma.compareParams(template, paramList):
                    try:
                        DATA = processDataOneFile(filepath)
                    except Exception:
                        print("Cannot load the data. in dir", dirName)
                        continue
                    plotTheTimes(DATA[0], DATA[1], DATA[2], DATA[3], DATA[4],
                                 dirName)
                    var = float(paramList[vv['VAR']])
                    varx = float(paramList[vv['VARX']])
                    datOut.append((var, varx, DATA[6], DATA[0].shape[1],
                                   DATA[0].shape[0], dirName))
    if len(datOut) > 0:
        datOut = np.array(datOut, dtype=outType)
        return np.sort(datOut, order=dataOrdering)
    else:
        print("ERROR in serchTheDirs(): output array is empty")
        return None