Exemple #1
0
def parsepars(parstring):
    """ parse parameters specific to the ACEnsemble parstring """
    import numpy as np
    pars = {}
    sectiontitles = ["START OF DATA:", "START OF AC:", "START OF ENS:"]
    startidxs, finidxs = parstr_sectionindexes(parstring, sectiontitles)
    alllines = parstring.splitlines()
    # parse the data section --------------------------------------------------------
    lines = alllines[startidxs[0]:finidxs[0]]
    # data
    pars['datafile'] = lines[0].split()[0]
    try:
        data = read_gslib(pars['datafile'])
    except:
        print("ERROR: `{}` must be geoeas-formatted!".format(pars['datafile']))
        raise
    cols = data.columns
    # locations
    dh, ifx, ify, ifz = [int(i) for i in lines[1].split()[:4]]
    assert all(c > 0 for c in [ifx, ify]), "ERROR: ifx and ify cols must be > zero"
    pars['locations'] = data[[cols[ifx - 1], cols[ify - 1]]].values
    if ifz > 0:
        pars['locations'] = np.c_[pars['locations'], data[cols[ifz - 1]].values]
    pars['dhs'] = dh[cols[dh - 1]] if dh > 0 else None
    # mvdata
    varcols = [int(c) for c in lines[2][:lines[2].rfind('-')].split()]
    pars['mvdata'] = data[[cols[vc - 1] for vc in varcols]].values
    # output options
    pars['outfile'] = lines[3].split()[0]
    pars['saveall'], pars['recode'] = [bool(int(p)) for p in lines[4].split()[:2]]
    pars['appendout'] = bool(int(lines[5].split()[0]))
    # parse the ac section --------------------------------------------------------
    lines = alllines[startidxs[1]:finidxs[1]]
    # search stuff
    pars['nnears'] = int(lines[0].split()[0])
    angs = [float(a) for a in lines[1].split()[:3]]
    ranges = [float(r) for r in lines[2].split()[:3]]
    pars['searchparams'] = tuple(angs + ranges)
    # cluster stuff, metrics and proportions
    pars['cluster_method'] = lines[3].split()[0]
    pars['acmetric'] = [int(a) for a in lines[4][:lines[4].rfind('-')].split()]
    pars['acprop'] = [float(a) for a in lines[5][:lines[4].rfind('-')].split()]
    # parse the ens section --------------------------------------------------------
    lines = alllines[startidxs[2]:finidxs[2]]
    # rseed
    pars['rseed'] = int(lines[0].split()[0])
    pars['nreal'] = int(lines[1].split()[0])
    pars['minvars'] = int(lines[2].split()[0])
    pars['fnclus'], pars['tnclus'] = [int(c) for c in lines[3].split()[:2]]
    pars['minremove'], pars['maxremove'] = [float(c) for c in lines[4].split()[:2]]
    pars['minfound'], pars['maxfound'] = [float(c) for c in lines[5].split()[:2]]
    pars['consensus_method'] = lines[6].split()[0]
    return pars
Exemple #2
0
def main():
    # some defaults to make parts of this generic
    cluster_object = sp.ACEnsemble
    thisname = cluster_object.__name__
    shortname = thisname[:5].lower()
    # setup the argparser and check for no args
    parser = ArgumentParser(description="Runner for {}".format(thisname))
    parser.add_argument('parfile', type=str, nargs='?',
                        help="the parfile to call `{}` with".format(thisname))
    args = parser.parse_args()
    if args.parfile is None:  # if nothing passed, print the default parfile
        writeparfile(defaultparstr, '{}.par'.format(thisname.lower()))
        sys.exit(0)
    else:
        assert os.path.isfile(args.parfile), "ERROR: {} does not exist!".format(args.parfile)
    # parse the parameter file
    pars = parsepars(readparfile(args.parfile))
    # collect pars that are not sent to the cluster object init
    datafile = pars.pop('datafile')
    outfile = pars.pop('outfile')
    saveall = pars.pop('saveall')
    recode = pars.pop('recode')
    appendout = pars.pop('appendout')
    fnclus = pars.pop('fnclus')
    tnclus = pars.pop('tnclus')
    # nthread = pars.pop('nthread')
    consensus_method = pars.pop('consensus_method')
    # setup the output datafile
    if appendout:
        data = read_gslib(datafile)
    else:
        from pandas import DataFrame
        data = DataFrame()
    # generate the clustering object
    model = cluster_object(**pars)
    model.fit(tnclus)
    labels = model.predict(fnclus, method=consensus_method)
    data['{}_clusters'.format(shortname)] = labels
    if saveall:
        if recode:
            clusterings, _ = sp.reclass_clusters(labels, model.clusterings)
        else:
            clusterings = model.clusterings
        for i in range(pars['nreal']):
            data['{}_real{}'.format(shortname, i)] = clusterings[:, i]
    # write the output
    write_gslib(data, outfile)
Exemple #3
0
def parsepars(parstring):
    """ parse parameters specific to the ACEnsemble parstring """
    import numpy as np
    pars = {}
    sectiontitles = ["START OF DATA:", "START OF AC:"]
    startidxs, finidxs = parstr_sectionindexes(parstring, sectiontitles)
    alllines = parstring.splitlines()
    # parse the data section --------------------------------------------------------
    lines = alllines[startidxs[0]:finidxs[0]]
    # data
    pars['datafile'] = lines[0].split()[0]
    try:
        data = read_gslib(pars['datafile'])
    except:
        print("ERROR: `{}` must be geoeas-formatted!".format(pars['datafile']))
        raise
    cols = data.columns
    # locations
    ifx, ify, ifz = [int(i) for i in lines[1].split()[:3]]
    assert all(c > 0
               for c in [ifx, ify]), "ERROR: ifx and ify cols must be > zero"
    pars['locations'] = data[[cols[ifx - 1], cols[ify - 1]]].values
    if ifz > 0:
        pars['locations'] = np.c_[pars['locations'],
                                  data[cols[ifz - 1]].values]
    # mvdata
    varcols = [int(c) for c in lines[2][:lines[2].rfind('-')].split()]
    pars['mvdata'] = data[[cols[vc - 1] for vc in varcols]].values
    # output options
    pars['outfile'] = lines[3].split()[0]
    pars['appendout'] = bool(int(lines[4].split()[0]))
    # parse the ac section --------------------------------------------------------
    lines = alllines[startidxs[1]:finidxs[1]]
    # search stuff
    pars['nnears'] = int(lines[0].split()[0])
    angs = [float(a) for a in lines[1].split()[:3]]
    ranges = [float(r) for r in lines[2].split()[:3]]
    pars['searchparams'] = tuple(angs + ranges)
    # cluster stuff, metrics and proportions
    pars['cluster_method'] = lines[3].split()[0]
    pars['acmetric'] = sp.accluster._acmetric_MAPPING_R[int(
        lines[4].split()[0])]
    pars['nclus'] = int(lines[5].split()[0])
    return pars
Exemple #4
0
def main():
    # some defaults to make parts of this generic
    cluster_object = sp.ACCluster
    thisname = cluster_object.__name__
    shortname = thisname[:3].lower()
    # setup the argparser and check for no args
    parser = ArgumentParser(description="Runner for {}".format(thisname))
    parser.add_argument('parfile',
                        type=str,
                        nargs='?',
                        help="the parfile to call `{}` with".format(thisname))
    args = parser.parse_args()
    if args.parfile is None:  # if nothing passed, print the default parfile
        writeparfile(defaultparstr, '{}.par'.format(thisname.lower()))
        sys.exit(0)
    else:
        assert os.path.isfile(
            args.parfile), "ERROR: {} does not exist!".format(args.parfile)
    # parse the parameter file
    pars = parsepars(readparfile(args.parfile))
    # collect pars that are not sent to the cluster object init
    datafile = pars.pop('datafile')
    outfile = pars.pop('outfile')
    appendout = pars.pop('appendout')
    # setup the output datafile
    if appendout:
        data = read_gslib(datafile)
    else:
        from pandas import DataFrame
        data = DataFrame()
    # generate the clustering object
    model = cluster_object(**pars)
    model.fit()
    labels = model.predict()
    data['{}_clusters'.format(shortname)] = labels
    # write the output
    write_gslib(data, outfile)