Example #1
0
def main():

    # Get the options
    input = options["input"]
    where = options["where"]
    columns = options["columns"]
    tempwhere = options["t_where"]
    layer = options["layer"]
    separator = grass.separator(options["separator"])

    if where == "" or where == " " or where == "\n":
        where = None

    if columns == "" or columns == " " or columns == "\n":
        columns = None

    # Make sure the temporal database exists
    tgis.init()

    sp = tgis.open_old_stds(input, "stvds")

    rows = sp.get_registered_maps("name,layer,mapset,start_time,end_time",
                                  tempwhere, "start_time", None)

    col_names = ""
    if rows:
        for row in rows:
            vector_name = "%s@%s" % (row["name"], row["mapset"])
            # In case a layer is defined in the vector dataset,
            # we override the option layer
            if row["layer"]:
                layer = row["layer"]

            select = grass.read_command("v.db.select", map=vector_name,
                                        layer=layer, columns=columns,
                                        separator="%s" % (separator), where=where)

            if not select:
                grass.fatal(_("Unable to run v.db.select for vector map <%s> "
                              "with layer %s") % (vector_name, layer))
            # The first line are the column names
            list = select.split("\n")
            count = 0
            for entry in list:
                if entry.strip() != "":
                    # print the column names in case they change
                    if count == 0:
                        col_names_new = "start_time%send_time%s%s" % (
                            separator, separator, entry)
                        if col_names != col_names_new:
                            col_names = col_names_new
                            print col_names
                    else:
                        if row["end_time"]:
                            print "%s%s%s%s%s" % (row["start_time"], separator,
                                                  row["end_time"], separator, entry)
                        else:
                            print "%s%s%s%s" % (row["start_time"],
                                                separator, separator, entry)
                    count += 1
Example #2
0
def main():

    # Get the options
    input = options["input"]
    where = options["where"]
    extended = flags["e"]
    no_header = flags["s"]
    separator = grass.separator(options["separator"])

    # Make sure the temporal database exists
    tgis.init()

    tgis.print_gridded_dataset_univar_statistics(
        "strds", input, where, extended, no_header, separator)
Example #3
0
def main():

    # Get the options
    inputs = options["inputs"]
    sampler = options["sample"]
    samtype = options["samtype"]
    intype = options["intype"]
    separator = grass.separator(options["separator"])
    method = options["method"]
    header = flags["c"]
    spatial = flags["s"]

    # Make sure the temporal database exists
    tgis.init()

    tgis.sample_stds_by_stds_topology(intype, samtype, inputs, sampler, header, separator, method, spatial, True)
Example #4
0
def main():

    # Get the options
    input = options["input"]
    columns = options["columns"]
    order = options["order"]
    where = options["where"]
    separator = grass.separator(options["separator"])
    method = options["method"]
    header = flags["u"]
    output = options["output"]

    # Make sure the temporal database exists
    tgis.init()

    tgis.list_maps_of_stds("stvds", input, columns, order, where, separator, method, header, outpath=output)
Example #5
0
def main():

    # Get the options
    input = options["input"]
    twhere = options["twhere"]
    layer = options["layer"]
    type = options["type"]
    column = options["column"]
    where = options["where"]
    extended = flags["e"]
    header = flags["s"]
    separator = grass.separator(options["separator"])

    # Make sure the temporal database exists
    tgis.init()

    tgis.print_vector_dataset_univar_statistics(
        input, twhere, layer, type, column, where, extended, header, separator)
Example #6
0
def main():
    #lazy imports
    import grass.temporal as tgis

    # Get the options
    input = options["input"]
    columns = options["columns"]
    order = options["order"]
    where = options["where"]
    separator = grass.separator(options["separator"])
    method = options["method"]
    header = flags["u"]
    output = options["output"]

    # Make sure the temporal database exists
    tgis.init()

    tgis.list_maps_of_stds("stvds", input, columns, order, where, separator,
                           method, header, outpath=output)
Example #7
0
def main():
    # Get the options
    name = options["input"]
    maps = options["maps"]
    type = options["type"]
    file = options["file"]
    separator = grass.separator(options["separator"])
    start = options["start"]
    end = options["end"]
    unit = options["unit"]
    increment = options["increment"]
    interval = flags["i"]

    # Make sure the temporal database exists
    tgis.init()
    # Register maps
    tgis.register_maps_in_space_time_dataset(
        type=type, name=name, maps=maps, file=file, start=start, end=end,
        unit=unit, increment=increment, dbif=None, interval=interval, fs=separator)
Example #8
0
def main():
    # lazy imports
    import grass.temporal as tgis

    # Get the options
    inputs = options["inputs"]
    sampler = options["sample"]
    samtype = options["samtype"]
    intype = options["intype"]
    separator = grass.separator(options["separator"])
    method = options["method"]
    header = flags["c"]
    spatial = flags["s"]

    # Make sure the temporal database exists
    tgis.init()

    tgis.sample_stds_by_stds_topology(intype, samtype, inputs, sampler, header,
                                      separator, method, spatial, True)
Example #9
0
def main():

    # Get the options
    name = options["input"]
    maps = options["maps"]
    type = options["type"]
    file = options["file"]
    separator = grass.separator(options["separator"])
    start = options["start"]
    end = options["end"]
    unit = options["unit"]
    increment = options["increment"]
    interval = flags["i"]

    # Make sure the temporal database exists
    tgis.init()
    # Register maps
    tgis.register_maps_in_space_time_dataset(
        type=type, name=name, maps=maps, file=file, start=start, end=end,
        unit=unit, increment=increment, dbif=None, interval=interval, fs=separator)
Example #10
0
def main():

    # Get the options
    input = options["input"]
    output = options["output"]
    where = options["where"]
    extended = flags["e"]
    no_header = flags["s"]
    separator = grass.separator(options["separator"])

    # Make sure the temporal database exists
    tgis.init()

    if not output:
        output = None
    if output == "-":
        output = None

    tgis.print_gridded_dataset_univar_statistics("str3ds", input, output,
                                                 where, extended, no_header,
                                                 separator)
Example #11
0
def main():

    # Get the options
    input = options["input"]
    output = options["output"]
    where = options["where"]
    extended = flags["e"]
    no_header = flags["s"]
    rast_region = bool(flags["r"])
    separator = grass.separator(options["separator"])

    # Make sure the temporal database exists
    tgis.init()

    if not output:
        output = None
    if output == "-":
        output = None

    tgis.print_gridded_dataset_univar_statistics(
        "strds", input, output, where, extended, no_header, separator, rast_region
    )
Example #12
0
def main():

    # Get the options
    input = options["input"]
    output = options["output"]
    twhere = options["twhere"]
    layer = options["layer"]
    type = options["type"]
    column = options["column"]
    where = options["where"]
    extended = flags["e"]
    header = flags["s"]
    separator = grass.separator(options["separator"])

    # Make sure the temporal database exists
    tgis.init()

    if not output:
        output = None
    if output == "-":
        output = None

    tgis.print_vector_dataset_univar_statistics(
        input, output, twhere, layer, type, column, where, extended, header, separator)
Example #13
0
def main():
    orig_point_map = options['input']
    flow_file = options['flow_input_file']
    minoffset = float(options['minimum_offset'])
    maxoffset = float(options['maximum_offset'])
    vertices = int(options['vertices'])
    outputfile = options['output']
    separator = gscript.separator(options['separator'])
    sameok = flags['s']
    header = True

    pid = os.getpid()

    global tmplines, tmplines2, tmppoints, vseginfile, vnetinfile
    tmplines = 'tmp_vnetcurvedarcs_tmplines_%d' % pid
    tmplines2 = 'tmp_vnetcurvedarcs_tmplines2_%d' % pid
    tmppoints = 'tmp_vnetcurvedarcs_tmppoints_%d' % pid

    vnetinfile, sqlfile = process_infile(flow_file, separator, header, sameok,
                                         outputfile)
    gscript.message(_("Creating straight flow lines..."))
    gscript.run_command('v.net',
                        points=orig_point_map,
                        operation='arcs',
                        file_=vnetinfile,
                        out=tmplines,
                        overwrite=True,
                        quiet=True)

    linedata = gscript.read_command('v.to.db',
                                    flags='p',
                                    map_=tmplines,
                                    option='length',
                                    quiet=True).splitlines()

    lineinfo = {}
    for line in linedata:
        data = line.split('|')
        if int(data[0]) > 0:
            lineinfo[int(data[0])] = float(data[1])

    vseginfile, maxcat = write_segmentdefs(lineinfo, minoffset, maxoffset,
                                           vertices)

    gscript.message(_("Creating points of curved lines..."))
    gscript.run_command('v.segment',
                        input_=tmplines,
                        out=tmppoints,
                        rules=vseginfile,
                        overwrite=True,
                        quiet=True)

    gscript.message(_("Creating curved lines from points..."))

    vnetinfile = write_segarcdefs(lineinfo, maxcat)
    gscript.run_command('v.net',
                        points=tmppoints,
                        output=tmplines,
                        operation='arcs',
                        file_=vnetinfile,
                        overwrite=True,
                        quiet=True)

    gscript.run_command('v.extract',
                        input_=tmplines,
                        output=tmplines2,
                        layer=1,
                        overwrite=True,
                        quiet=True)

    gscript.message(_("Creating polylines..."))
    gscript.run_command('v.build.polylines',
                        input_=tmplines2,
                        output=outputfile,
                        cats='multi',
                        overwrite=True,
                        quiet=True)

    gscript.run_command(
        'v.db.addtable',
        map_=outputfile,
        columns="from_node int, to_node int, volume double precision",
        quiet=True,
        overwrite=True)

    gscript.run_command('db.execute', input_=sqlfile, quiet=True)
Example #14
0
def main():
    """Import file according to the command line parameters"""
    # Allow more locals in the main.
    # pylint: disable=too-many-locals
    options, unused_flags = gs.parser()

    # Requires pyproj >= 2.2.0
    # Lazy importing pyproj because it is not a dependency of GRASS GIS.
    from pyproj import Transformer  # pylint: disable=import-outside-toplevel

    to_crs = get_current_crs()
    # We assign xy as result, so we need to keep the en ordering.
    transformer = Transformer.from_crs(options["crs"],
                                       to_crs,
                                       always_xy=True,
                                       skip_equivalent=True)

    input_filename = options["input"]
    output_map = options["output"]
    lat_name = options["latitude"]
    lon_name = options["longitude"]

    separator = gs.separator(options["separator"])

    integer_names = options["int_columns"].split(",")
    float_names = options["real_columns"].split(",")

    # Lat and lon as doubles because we require that anyway.
    float_names.extend([lat_name, lon_name])

    if options["limit"]:
        limit = int(options["limit"])
    else:
        limit = None
    assert limit is None or limit >= 1, "Check limit option definition"

    fieldnames = get_header_from_csv(input_filename, separator)
    if "X" not in fieldnames and "Y" not in fieldnames:
        # If there is X and Y, we will replace is content.
        fieldnames.extend(["X", "Y"])
        float_names.extend(["X", "Y"])
        y_index = len(fieldnames)  # One-based index in v.in.ascii
        x_index = y_index - 1
    else:
        y_index = fieldnames.index("Y") + 1
        x_index = fieldnames.index("X") + 1

    tmp_file = get_tmp_file_name()

    with open(input_filename) as infile, open(tmp_file, mode="w") as outfile:
        reader = csv.DictReader(infile, delimiter=separator)
        writer = csv.DictWriter(
            outfile,
            fieldnames=fieldnames,
            delimiter=separator,
            quotechar='"',
            lineterminator="\n",
        )
        writer.writeheader()
        for i, row in enumerate(reader):
            if limit and i >= limit:
                break
            lon = float(row[lon_name])
            lat = float(row[lat_name])
            x, y = transformer.transform(lon, lat)
            row["X"] = x
            row["Y"] = y
            writer.writerow(row)

    sql_columns = names_to_sql_columns(fieldnames, float_names, integer_names)

    gs.run_command(
        "v.in.ascii",
        input=tmp_file,
        output=output_map,
        format="point",
        separator=separator,
        text='"',
        skip=1,
        columns=sql_columns,
        x=x_index,
        y=y_index,
    )

    return 0
Example #15
0
def main():

    global allmap
    global trainmap
    global feature_vars
    global training_vars
    global model_output_csv
    global model_output_csvt
    global temptable
    global r_commands
    global reclass_files

    allmap = trainmap = feature_vars = training_vars = None
    model_output_csv = model_output_csvt = temptable = r_commands = None
    reclass_files = None

    voting_function = "voting <- function (x, w) {\n"
    voting_function += "res <- tapply(w, x, sum, simplify = TRUE)\n"
    voting_function += "maj_class <- as.numeric(names(res)[which.max(res)])\n"
    voting_function += "prob <- as.numeric(res[which.max(res)])\n"
    voting_function += "return(list(maj_class=maj_class, prob=prob))\n}"

    weighting_functions = {}
    weighting_functions[
        'smv'] = "weights <- rep(1/length(weighting_base), length(weighting_base))"
    weighting_functions[
        'swv'] = "weights <- weighting_base/sum(weighting_base)"
    weighting_functions[
        'bwwv'] = "weights <- 1-(max(weighting_base) - weighting_base)/(max(weighting_base) - min(weighting_base))"
    weighting_functions[
        'qbwwv'] = "weights <- ((min(weighting_base) - weighting_base)/(max(weighting_base) - min(weighting_base)))**2"

    packages = {
        'svmRadial': ['kernlab'],
        'svmLinear': ['kernlab'],
        'svmPoly': ['kernlab'],
        'rf': ['randomForest'],
        'rpart': ['rpart'],
        'C5.0': ['C50'],
        'xgbTree': ['xgboost', 'plyr']
    }

    install_package = "if(!is.element('%s', installed.packages()[,1])){\n"
    install_package += "cat('\\n\\nInstalling %s package from CRAN\n')\n"
    install_package += "if(!file.exists(Sys.getenv('R_LIBS_USER'))){\n"
    install_package += "dir.create(Sys.getenv('R_LIBS_USER'), recursive=TRUE)\n"
    install_package += ".libPaths(Sys.getenv('R_LIBS_USER'))}\n"
    install_package += "chooseCRANmirror(ind=1)\n"
    install_package += "install.packages('%s', dependencies=TRUE)}"

    if options['segments_map']:
        allfeatures = options['segments_map']
        segments_layer = options['segments_layer']
        allmap = True
    else:
        allfeatures = options['segments_file']
        allmap = False

    if options['training_map']:
        training = options['training_map']
        training_layer = options['training_layer']
        trainmap = True
    else:
        training = options['training_file']
        trainmap = False

    classcol = options['train_class_column']
    output_classcol = options['output_class_column']
    output_probcol = None
    if options['output_prob_column']:
        output_probcol = options['output_prob_column']
    classifiers = options['classifiers'].split(',')
    weighting_modes = options['weighting_modes'].split(',')
    weighting_metric = options['weighting_metric']
    processes = int(options['processes'])
    folds = options['folds']
    partitions = options['partitions']
    tunelength = options['tunelength']
    separator = gscript.separator(options['separator'])
    tunegrids = literal_eval(
        options['tunegrids']) if options['tunegrids'] else {}

    classification_results = None
    if options['classification_results']:
        classification_results = options['classification_results'].replace(
            "\\", "/")

    model_details = None
    if options['model_details']:
        model_details = options['model_details'].replace("\\", "/")

    raster_segments_map = None
    if options['raster_segments_map']:
        raster_segments_map = options['raster_segments_map']

    classified_map = None
    if options['classified_map']:
        classified_map = options['classified_map']

    r_script_file = None
    if options['r_script_file']:
        r_script_file = options['r_script_file']

    accuracy_file = None
    if options['accuracy_file']:
        accuracy_file = options['accuracy_file'].replace("\\", "/")

    bw_plot_file = None
    if options['bw_plot_file']:
        bw_plot_file = options['bw_plot_file'].replace("\\", "/")

    if allmap:
        feature_vars = gscript.tempfile().replace("\\", "/")
        gscript.run_command('v.db.select',
                            map_=allfeatures,
                            file_=feature_vars,
                            layer=segments_layer,
                            quiet=True,
                            overwrite=True)
    else:
        feature_vars = allfeatures.replace("\\", "/")

    if trainmap:
        training_vars = gscript.tempfile().replace("\\", "/")
        gscript.run_command('v.db.select',
                            map_=training,
                            file_=training_vars,
                            layer=training_layer,
                            quiet=True,
                            overwrite=True)
    else:
        training_vars = training.replace("\\", "/")

    r_commands = gscript.tempfile().replace("\\", "/")

    r_file = open(r_commands, 'w')

    if processes > 1:
        install = install_package % ('doParallel', 'doParallel', 'doParallel')
        r_file.write(install)
        r_file.write("\n")

    # automatic installation of missing R packages
    install = install_package % ('caret', 'caret', 'caret')
    r_file.write(install)
    r_file.write("\n")
    install = install_package % ('e1071', 'e1071', 'e1071')
    r_file.write(install)
    r_file.write("\n")
    for classifier in classifiers:
        # knn is included in caret
        if classifier == "knn" or classifier == "knn1":
            continue
        for package in packages[classifier]:
            install = install_package % (package, package, package)
            r_file.write(install)
            r_file.write("\n")
    r_file.write("\n")
    r_file.write('require(caret)')
    r_file.write("\n")
    r_file.write(
        'features <- read.csv("%s", sep="%s", header=TRUE, row.names=1)' %
        (feature_vars, separator))
    r_file.write("\n")
    r_file.write(
        'training <- read.csv("%s", sep="%s", header=TRUE, row.names=1)' %
        (training_vars, separator))
    r_file.write("\n")
    r_file.write("training$%s <- as.factor(training$%s)" %
                 (classcol, classcol))
    r_file.write("\n")
    if processes > 1:
        r_file.write("library(doParallel)")
        r_file.write("\n")
        r_file.write("registerDoParallel(cores = %d)" % processes)
        r_file.write("\n")
    r_file.write(
        "MyFolds.cv <- createMultiFolds(training$%s, k=%s, times=%s)" %
        (classcol, folds, partitions))
    r_file.write("\n")
    r_file.write(
        "MyControl.cv <- trainControl(method='repeatedCV', index=MyFolds.cv)")
    r_file.write("\n")
    r_file.write("fmla <- %s ~ ." % classcol)
    r_file.write("\n")
    r_file.write("models.cv <- list()")
    r_file.write("\n")
    for classifier in classifiers:
        if classifier == 'knn1':
            r_file.write("Grid <- expand.grid(k=1)")
            r_file.write("\n")
            r_file.write(
                "knn1Model.cv <- train(fmla, training, method='knn', trControl=MyControl.cv, tuneGrid=Grid)"
            )
            r_file.write("\n")
            r_file.write("models.cv$knn1 <- knn1Model.cv")
            r_file.write("\n")
        else:
            if classifier in tunegrids:
                r_file.write("Grid <- expand.grid(%s)" % tunegrids[classifier])
                r_file.write("\n")
                r_file.write(
                    "%sModel.cv <- train(fmla,training,method='%s', trControl=MyControl.cv, tuneGrid=Grid)"
                    % (classifier, classifier))
            else:
                r_file.write(
                    "%sModel.cv <- train(fmla,training,method='%s', trControl=MyControl.cv, tuneLength=%s)"
                    % (classifier, classifier, tunelength))
            r_file.write("\n")
            r_file.write("models.cv$%s <- %sModel.cv" %
                         (classifier, classifier))
            r_file.write("\n")

    r_file.write("if (length(models.cv)>1) {")
    r_file.write("\n")
    r_file.write("resamps.cv <- resamples(models.cv)")
    r_file.write("\n")
    r_file.write(
        "accuracy_means <- as.vector(apply(resamps.cv$values[seq(2,length(resamps.cv$values), by=2)], 2, mean))"
    )
    r_file.write("\n")
    r_file.write(
        "kappa_means <- as.vector(apply(resamps.cv$values[seq(3,length(resamps.cv$values), by=2)], 2, mean))"
    )
    r_file.write("\n")
    r_file.write("} else {")
    r_file.write("\n")
    r_file.write("resamps.cv <- models.cv[[1]]$resample")
    r_file.write("\n")
    r_file.write("accuracy_means <- mean(resamps.cv$Accuracy)")
    r_file.write("\n")
    r_file.write("kappa_means <- mean(resamps.cv$Kappa)")
    r_file.write("\n")
    r_file.write("}")
    r_file.write("\n")
    r_file.write("predicted <- data.frame(predict(models.cv, features))")
    r_file.write("\n")
    if flags['i']:
        r_file.write(
            "resultsdf <- data.frame(id=rownames(features), predicted)")
    else:
        r_file.write("resultsdf <- data.frame(id=rownames(features))")
    r_file.write("\n")
    r_file.write(voting_function)
    r_file.write("\n")

    if weighting_metric == 'kappa':
        r_file.write("weighting_base <- kappa_means")
    else:
        r_file.write("weighting_base <- accuracy_means")
    r_file.write("\n")
    for weighting_mode in weighting_modes:
        r_file.write(weighting_functions[weighting_mode])
        r_file.write("\n")
        r_file.write("weights <- weights / sum(weights)")
        r_file.write("\n")
        r_file.write("vote <- apply(predicted, 1, voting, w=weights)")
        r_file.write("\n")
        r_file.write(
            "vote <- as.data.frame(matrix(unlist(vote), ncol=2, byrow=TRUE))")
        r_file.write("\n")
        r_file.write("resultsdf$%s_%s <- vote$V1" %
                     (output_classcol, weighting_mode))
        r_file.write("\n")
        if len(classifiers) > 1:
            r_file.write("resultsdf$%s_%s <- vote$V2" %
                         (output_probcol, weighting_mode))
            r_file.write("\n")

    if allmap and not flags['f']:
        model_output = gscript.tempfile().replace("\\", "/")
        model_output_csv = model_output + '.csv'
        write_string = "write.csv(resultsdf, '%s'," % model_output_csv
        write_string += " row.names=FALSE, quote=FALSE)"
        r_file.write(write_string)
        r_file.write("\n")
    if classified_map:
        reclass_files = {}
        if flags['i']:
            for classifier in classifiers:
                tmpfilename = gscript.tempfile()
                reclass_files[classifier] = tmpfilename.replace("\\", "/")
                r_file.write(
                    "tempdf <- data.frame(resultsdf$id, resultsdf$%s)" %
                    (classifier))
                r_file.write("\n")
                r_file.write(
                    "reclass <- data.frame(out=apply(tempdf, 1, function(x) paste(x[1],'=', x[2])))"
                )
                r_file.write("\n")
                r_file.write(
                    "write.table(reclass$out, '%s', col.names=FALSE, row.names=FALSE, quote=FALSE)"
                    % reclass_files[classifier])
                r_file.write("\n")
        for weighting_mode in weighting_modes:
            tmpfilename = gscript.tempfile()
            reclass_files[weighting_mode] = tmpfilename.replace("\\", "/")
            r_file.write(
                "tempdf <- data.frame(resultsdf$id, resultsdf$%s_%s)" %
                (output_classcol, weighting_mode))
            r_file.write("\n")
            r_file.write(
                "reclass <- data.frame(out=apply(tempdf, 1, function(x) paste(x[1],'=', x[2])))"
            )
            r_file.write("\n")
            r_file.write(
                "write.table(reclass$out, '%s', col.names=FALSE, row.names=FALSE, quote=FALSE)"
                % reclass_files[weighting_mode])
            r_file.write("\n")

    if classification_results:
        r_file.write(
            "write.csv(resultsdf, '%s', row.names=FALSE, quote=FALSE)" %
            classification_results)
        r_file.write("\n")
    if accuracy_file:
        r_file.write(
            "df_means <- data.frame(method=names(models.cv),accuracy=accuracy_means, kappa=kappa_means)"
        )
        r_file.write("\n")
        r_file.write(
            "write.csv(df_means, '%s', row.names=FALSE, quote=FALSE)" %
            accuracy_file)
        r_file.write("\n")
    if model_details:
        r_file.write("sink('%s')" % model_details)
        r_file.write("\n")
        r_file.write("cat('BEST TUNING VALUES\n')")
        r_file.write("\n")
        r_file.write("cat('******************************\n\n')")
        r_file.write("\n")
        r_file.write("lapply(models.cv, function(x) x$best)")
        r_file.write("\n")
        r_file.write("cat('\n')")
        r_file.write("\n")
        r_file.write("cat('\nSUMMARY OF RESAMPLING RESULTS\n')")
        r_file.write("\n")
        r_file.write("cat('******************************\n\n')")
        r_file.write("\n")
        r_file.write("summary(resamps.cv)")
        r_file.write("\n")
        r_file.write("cat('\n')")
        r_file.write("\n")
        r_file.write("cat('\nRESAMPLED CONFUSION MATRICES\n')")
        r_file.write("\n")
        r_file.write("cat('******************************\n\n')")
        r_file.write("\n")
        r_file.write(
            "conf.mat.cv <- lapply(models.cv, function(x) confusionMatrix(x))")
        r_file.write("\n")
        r_file.write("print(conf.mat.cv)")
        r_file.write("\n")
        r_file.write("cat('\nDETAILED CV RESULTS\n')")
        r_file.write("\n")
        r_file.write("cat('******************************\n\n')")
        r_file.write("\n")
        r_file.write("lapply(models.cv, function(x) x$results)")
        r_file.write("\n")
        r_file.write("sink()")
        r_file.write("\n")

    if bw_plot_file and len(classifiers) > 1:
        r_file.write("png('%s.png')" % bw_plot_file)
        r_file.write("\n")
        r_file.write("print(bwplot(resamps.cv))")
        r_file.write("\n")
        r_file.write("dev.off()")
    r_file.close()

    if r_script_file:
        shutil.copy(r_commands, r_script_file)

    gscript.message("Running R now. Following output is R output.")
    try:
        subprocess.check_call(
            ['Rscript', r_commands],
            stderr=subprocess.STDOUT,
        )
    except subprocess.CalledProcessError:
        gscript.fatal(
            "There was an error in the execution of the R script.\nPlease check the R output."
        )

    gscript.message("Finished running R.")

    if allmap and not flags['f']:

        model_output_csvt = model_output + '.csvt'
        temptable = 'classif_tmp_table_%d' % os.getpid()

        f = open(model_output_csvt, 'w')
        header_string = '"Integer"'
        if flags['i']:
            for classifier in classifiers:
                header_string += ',"Integer"'
        if len(classifiers) > 1:
            for weighting_mode in weighting_modes:
                header_string += ',"Integer"'
                header_string += ',"Real"'
        else:
            header_string += ',"Integer"'

        f.write(header_string)
        f.close()

        gscript.message("Loading results into attribute table")
        gscript.run_command('db.in.ogr',
                            input_=model_output_csv,
                            output=temptable,
                            overwrite=True,
                            quiet=True)
        index_creation = "CREATE INDEX idx_%s_cat" % temptable
        index_creation += " ON %s (id)" % temptable
        gscript.run_command('db.execute', sql=index_creation, quiet=True)
        columns = gscript.read_command('db.columns',
                                       table=temptable).splitlines()[1:]
        orig_cat = gscript.vector_db(allfeatures)[int(segments_layer)]['key']
        gscript.run_command('v.db.join',
                            map_=allfeatures,
                            column=orig_cat,
                            otable=temptable,
                            ocolumn='id',
                            subset_columns=columns,
                            quiet=True)

    if classified_map:
        for classification, reclass_file in reclass_files.iteritems():
            output_map = classified_map + '_' + classification
            gscript.run_command('r.reclass',
                                input=raster_segments_map,
                                output=output_map,
                                rules=reclass_file,
                                quiet=True)
Example #16
0
def main():
    orig_point_map = options["input"]
    flow_file = options["flow_input_file"]
    minoffset = float(options["minimum_offset"])
    maxoffset = float(options["maximum_offset"])
    vertices = int(options["vertices"])
    outputfile = options["output"]
    separator = gscript.separator(options["separator"])
    sameok = flags["s"]
    header = True

    pid = os.getpid()

    global tmplines, tmplines2, tmppoints, vseginfile, vnetinfile
    tmplines = "tmp_vnetcurvedarcs_tmplines_%d" % pid
    tmplines2 = "tmp_vnetcurvedarcs_tmplines2_%d" % pid
    tmppoints = "tmp_vnetcurvedarcs_tmppoints_%d" % pid

    vnetinfile, sqlfile = process_infile(
        flow_file, separator, header, sameok, outputfile
    )
    gscript.message(_("Creating straight flow lines..."))
    gscript.run_command(
        "v.net",
        points=orig_point_map,
        operation="arcs",
        file_=vnetinfile,
        out=tmplines,
        overwrite=True,
        quiet=True,
    )

    linedata = gscript.read_command(
        "v.to.db", flags="p", map_=tmplines, option="length", quiet=True
    ).splitlines()

    lineinfo = {}
    for line in linedata:
        data = line.split("|")
        if int(data[0]) > 0:
            lineinfo[int(data[0])] = float(data[1])

    vseginfile, maxcat = write_segmentdefs(lineinfo, minoffset, maxoffset, vertices)

    gscript.message(_("Creating points of curved lines..."))
    gscript.run_command(
        "v.segment",
        input_=tmplines,
        out=tmppoints,
        rules=vseginfile,
        overwrite=True,
        quiet=True,
    )

    gscript.message(_("Creating curved lines from points..."))

    vnetinfile = write_segarcdefs(lineinfo, maxcat)
    gscript.run_command(
        "v.net",
        points=tmppoints,
        output=tmplines,
        operation="arcs",
        file_=vnetinfile,
        overwrite=True,
        quiet=True,
    )

    gscript.run_command(
        "v.extract",
        input_=tmplines,
        output=tmplines2,
        layer=1,
        overwrite=True,
        quiet=True,
    )

    gscript.message(_("Creating polylines..."))
    gscript.run_command(
        "v.build.polylines",
        input_=tmplines2,
        output=outputfile,
        cats="multi",
        overwrite=True,
        quiet=True,
    )

    gscript.run_command(
        "v.db.addtable",
        map_=outputfile,
        columns="from_node int, to_node int, volume double precision",
        quiet=True,
        overwrite=True,
    )

    gscript.run_command("db.execute", input_=sqlfile, quiet=True)
Example #17
0
def test_unrecognized_separator():
    """Check that unknown strings are just passed through"""
    assert gs.separator("apple") == "apple"
def main(options, flags):
    import grass.pygrass.modules as pymod
    import grass.temporal as tgis
    from grass.pygrass.vector import VectorTopo

    invect = options["input"]
    if invect.find('@') != -1:
        invect = invect.split('@')[0]
    incol = options["date_column"]
    indate = options["date"]
    strds = options["strds"]
    if strds.find('@') != -1:
        strds_name = strds.split('@')[0]
    else:
        strds_name = strds
    output = options["output"]
    cols = options["columns"].split(',')
    mets = options["method"].split(',')
    gran = options["granularity"]
    dateformat = options["date_format"]
    separator = gscript.separator(options["separator"])

    stdout = False
    if output != '-' and flags['u']:
        gscript.fatal(_("Cannot combine 'output' option and 'u' flag"))
    elif output != '-' and flags['c']:
        gscript.fatal(_("Cannot combine 'output' option and 'c' flag"))
    elif output == '-' and (flags['u'] or flags['c']):
        output = invect
        gscript.warning(_("Attribute table of vector {name} will be updated"
                          "...").format(name=invect))
    else:
        stdout = True
    if flags['c']:
        cols = []
        for m in mets:
            colname = "{st}_{me}".format(st=strds_name, me=m)
            cols.append(colname)
            try:
                pymod.Module("v.db.addcolumn", map=invect, columns="{col} "
                             "double precision".format(col=colname))
            except CalledModuleError:
                gscript.fatal(_("Not possible to create column "
                                "{col}".format(col=colname)))

    if output != '-' and len(cols) != len(mets):
        gscript.fatal(_("'columns' and 'method' options must have the same "
                        "number of elements"))
    tgis.init()
    dbif = tgis.SQLDatabaseInterfaceConnection()
    dbif.connect()
    sp = tgis.open_old_stds(strds, "strds", dbif)

    if sp.get_temporal_type() == 'absolute':
        delta = int(tgis.gran_to_gran(gran, sp.get_granularity(), True))
        if tgis.gran_singular_unit(gran) in ['year', 'month']:
            delta = int(tgis.gran_to_gran(gran, '1 day', True))
            td = timedelta(delta)
        elif tgis.gran_singular_unit(gran) == 'day':
            delta = tgis.gran_to_gran(gran, sp.get_granularity(), True)
            td = timedelta(delta)
        elif tgis.gran_singular_unit(gran) == 'hour':
            td = timedelta(hours=delta)
        elif tgis.gran_singular_unit(gran) == 'minute':
            td = timedelta(minutes=delta)
        elif tgis.gran_singular_unit(gran) == 'second':
            td = timedelta(seconds=delta)
    else:
        if sp.get_granularity() >= int(gran):
            gscript.fatal(_("Input granularity is smaller or equal to the {iv}"
                            " STRDS granularity".format(iv=strds)))
        td = int(gran)
    if incol and indate:
        gscript.fatal(_("Cannot combine 'date_column' and 'date' options"))
    elif not incol and not indate:
        gscript.fatal(_("You have to fill 'date_column' or 'date' option"))
    elif incol:
        try:
            dates = pymod.Module("db.select", flags='c', stdout_=PI,
                                 stderr_=PI, sql="SELECT DISTINCT {dc} from "
                                   "{vmap} order by {dc}".format(vmap=invect,
                                                                 dc=incol))
            mydates = dates.outputs["stdout"].value.splitlines()
        except CalledModuleError:
            gscript.fatal(_("db.select return an error"))
    elif indate:
        mydates = [indate]
        pymap = VectorTopo(invect)
        pymap.open('r')
        if len(pymap.dblinks) == 0:
            try:
                pymap.close()
                pymod.Module("v.db.addtable", map=invect)
            except CalledModuleError:
                dbif.close()
                gscript.fatal(_("Unable to add table <%s> to vector map "
                                "<%s>" % invect))
        if pymap.is_open():
            pymap.close()
        qfeat = pymod.Module("v.category", stdout_=PI, stderr_=PI,
                             input=invect, option='print')
        myfeats = qfeat.outputs["stdout"].value.splitlines()

    if stdout:
        outtxt = ''
    for data in mydates:
        if sp.get_temporal_type() == 'absolute':
            fdata = datetime.strptime(data, dateformat)
        else:
            fdata = int(data)
        if flags['a']:
            sdata = fdata + td
            mwhere = "start_time >= '{inn}' and end_time < " \
                   "'{out}'".format(inn=fdata, out=sdata)
        else:
            sdata = fdata - td
            mwhere = "start_time >= '{inn}' and end_time < " \
                   "'{out}'".format(inn=sdata, out=fdata)
        lines = None
        try:
            r_what = pymod.Module("t.rast.what", points=invect, strds=strds,
                                  layout='timerow', separator=separator,
                                  flags="v", where=mwhere, quiet=True,
                                  stdout_=PI, stderr_=PI)
            lines = r_what.outputs["stdout"].value.splitlines()
        except CalledModuleError:
            pass
        if incol:
            try:
                qfeat = pymod.Module("db.select", flags='c', stdout_=PI,
                                     stderr_=PI, sql="SELECT DISTINCT cat from"
                                     " {vmap} where {dc}='{da}' order by "
                                     "cat".format(vmap=invect, da=data,
                                                  dc=incol))
                myfeats = qfeat.outputs["stdout"].value.splitlines()
            except CalledModuleError:
                gscript.fatal(_("db.select returned an error for date "
                                "{da}".format(da=data)))
        if not lines and stdout:
            for feat in myfeats:
                outtxt += "{di}{sep}{da}".format(di=feat, da=data,
                                                   sep=separator)
                for n in range(len(mets)):
                    outtxt += "{sep}{val}".format(val='*', sep=separator)
                outtxt += "\n"
        if not lines:
            continue
        x = 0
        for line in lines:
            vals = line.split(separator)
            if vals[0] in myfeats:
                try:
                    nvals = np.array(vals[4:]).astype(np.float)
                except ValueError:
                    if stdout:
                        outtxt += "{di}{sep}{da}".format(di=vals[0],
                                                         da=data,
                                                         sep=separator)
                        for n in range(len(mets)):
                            outtxt += "{sep}{val}".format(val='*',
                                                          sep=separator)
                        outtxt += "\n"
                    continue
                if stdout:
                    outtxt += "{di}{sep}{da}".format(di=vals[0], da=data,
                                                     sep=separator)
                for n in range(len(mets)):
                    result = return_value(nvals, mets[n])
                    if stdout:
                        outtxt += "{sep}{val}".format(val=result,
                                                      sep=separator)
                    else:
                        try:
                            if incol:
                                pymod.Module("v.db.update", map=output,
                                             column=cols[n], value=str(result),
                                             where="{dc}='{da}' AND cat="
                                             "{ca}".format(da=data, ca=vals[0],
                                                           dc=incol))
                            else:
                                pymod.Module("v.db.update", map=output,
                                             column=cols[n], value=str(result),
                                             where="cat={ca}".format(ca=vals[0]))
                        except CalledModuleError:
                            gscript.fatal(_("v.db.update return an error"))
                if stdout:
                    outtxt += "\n"
                if x == len(myfeats):
                    break
                else:
                    x += 1
    if stdout:
        print(outtxt)
Example #19
0
def main():

    global allmap
    global trainmap
    global feature_vars
    global training_vars
    global model_output_csv
    global model_output_csvt
    global temptable
    global r_commands
    global reclass_files

    allmap = trainmap = feature_vars = training_vars = None
    model_output_csv = model_output_csvt = temptable = r_commands = None
    reclass_files = None

    voting_function = "voting <- function (x, w) {\n"
    voting_function += "res <- tapply(w, x, sum, simplify = TRUE)\n"
    voting_function += "maj_class <- as.numeric(names(res)[which.max(res)])\n"
    voting_function += "prob <- as.numeric(res[which.max(res)])\n"
    voting_function += "return(list(maj_class=maj_class, prob=prob))\n}"

    weighting_functions = {}
    weighting_functions[
        "smv"] = "weights <- rep(1/length(weighting_base), length(weighting_base))"
    weighting_functions[
        "swv"] = "weights <- weighting_base/sum(weighting_base)"
    weighting_functions[
        "bwwv"] = "weights <- 1-(max(weighting_base) - weighting_base)/(max(weighting_base) - min(weighting_base))"
    weighting_functions[
        "qbwwv"] = "weights <- ((min(weighting_base) - weighting_base)/(max(weighting_base) - min(weighting_base)))**2"

    packages = {
        "svmRadial": ["kernlab"],
        "svmLinear": ["kernlab"],
        "svmPoly": ["kernlab"],
        "rf": ["randomForest"],
        "ranger": ["ranger", "dplyr"],
        "rpart": ["rpart"],
        "C5.0": ["C50"],
        "xgbTree": ["xgboost", "plyr"],
    }

    install_package = "if(!is.element('%s', installed.packages()[,1])){\n"
    install_package += "cat('\\n\\nInstalling %s package from CRAN')\n"
    install_package += "if(!file.exists(Sys.getenv('R_LIBS_USER'))){\n"
    install_package += "dir.create(Sys.getenv('R_LIBS_USER'), recursive=TRUE)\n"
    install_package += ".libPaths(Sys.getenv('R_LIBS_USER'))}\n"
    install_package += "chooseCRANmirror(ind=1)\n"
    install_package += "install.packages('%s', dependencies=TRUE)}"

    if options["segments_map"]:
        allfeatures = options["segments_map"]
        segments_layer = options["segments_layer"]
        allmap = True
    else:
        allfeatures = options["segments_file"]
        allmap = False

    if options["training_map"]:
        training = options["training_map"]
        training_layer = options["training_layer"]
        trainmap = True
    else:
        training = options["training_file"]
        trainmap = False

    classcol = None
    if options["train_class_column"]:
        classcol = options["train_class_column"]
    output_classcol = options["output_class_column"]
    output_probcol = None
    if options["output_prob_column"]:
        output_probcol = options["output_prob_column"]
    classifiers = options["classifiers"].split(",")
    weighting_modes = options["weighting_modes"].split(",")
    weighting_metric = options["weighting_metric"]
    if len(classifiers) == 1:
        gscript.message("Only one classifier, so no voting applied")

    processes = int(options["processes"])
    folds = options["folds"]
    partitions = options["partitions"]
    tunelength = options["tunelength"]
    separator = gscript.separator(options["separator"])
    tunegrids = literal_eval(
        options["tunegrids"]) if options["tunegrids"] else {}

    max_features = None
    if options["max_features"]:
        max_features = int(options["max_features"])

    training_sample_size = None
    if options["training_sample_size"]:
        training_sample_size = options["training_sample_size"]

    tuning_sample_size = None
    if options["tuning_sample_size"]:
        tuning_sample_size = options["tuning_sample_size"]

    output_model_file = None
    if options["output_model_file"]:
        output_model_file = options["output_model_file"].replace("\\", "/")

    input_model_file = None
    if options["input_model_file"]:
        input_model_file = options["input_model_file"].replace("\\", "/")

    classification_results = None
    if options["classification_results"]:
        classification_results = options["classification_results"].replace(
            "\\", "/")

    probabilities = flags["p"]

    model_details = None
    if options["model_details"]:
        model_details = options["model_details"].replace("\\", "/")

    raster_segments_map = None
    if options["raster_segments_map"]:
        raster_segments_map = options["raster_segments_map"]

    classified_map = None
    if options["classified_map"]:
        classified_map = options["classified_map"]

    r_script_file = None
    if options["r_script_file"]:
        r_script_file = options["r_script_file"]

    variable_importance_file = None
    if options["variable_importance_file"]:
        variable_importance_file = options["variable_importance_file"].replace(
            "\\", "/")

    accuracy_file = None
    if options["accuracy_file"]:
        accuracy_file = options["accuracy_file"].replace("\\", "/")

    bw_plot_file = None
    if options["bw_plot_file"]:
        bw_plot_file = options["bw_plot_file"].replace("\\", "/")

    if allmap:
        feature_vars = gscript.tempfile().replace("\\", "/")
        gscript.run_command(
            "v.db.select",
            map_=allfeatures,
            file_=feature_vars,
            layer=segments_layer,
            quiet=True,
            overwrite=True,
        )
    else:
        feature_vars = allfeatures.replace("\\", "/")

    if trainmap:
        training_vars = gscript.tempfile().replace("\\", "/")
        gscript.run_command(
            "v.db.select",
            map_=training,
            file_=training_vars,
            layer=training_layer,
            quiet=True,
            overwrite=True,
        )
    else:
        training_vars = training.replace("\\", "/")

    r_commands = gscript.tempfile().replace("\\", "/")

    r_file = open(r_commands, "w")

    if processes > 1:
        install = install_package % ("doParallel", "doParallel", "doParallel")
        r_file.write(install)
        r_file.write("\n")

    # automatic installation of missing R packages
    install = install_package % ("caret", "caret", "caret")
    r_file.write(install)
    r_file.write("\n")
    install = install_package % ("e1071", "e1071", "e1071")
    r_file.write(install)
    r_file.write("\n")
    install = install_package % ("data.table", "data.table", "data.table")
    r_file.write(install)
    r_file.write("\n")
    for classifier in classifiers:
        if classifier in packages:
            for package in packages[classifier]:
                install = install_package % (package, package, package)
                r_file.write(install)
                r_file.write("\n")
    r_file.write("\n")
    r_file.write("library(caret)")
    r_file.write("\n")
    r_file.write("library(data.table)")
    r_file.write("\n")

    if processes > 1:
        r_file.write("library(doParallel)")
        r_file.write("\n")
        r_file.write("registerDoParallel(cores = %d)" % processes)
        r_file.write("\n")

    if not flags["t"]:
        r_file.write(
            "features <- data.frame(fread('%s', sep='%s', header=TRUE, blank.lines.skip=TRUE, showProgress=FALSE), row.names=1)"
            % (feature_vars, separator))
        r_file.write("\n")
        if classcol:
            r_file.write(
                "if('%s' %%in%% names(features)) {features <- subset(features, select=-%s)}"
                % (classcol, classcol))
            r_file.write("\n")

    if input_model_file:
        r_file.write("finalModels <- readRDS('%s')" % input_model_file)
        r_file.write("\n")
        for classifier in classifiers:
            for package in packages[classifier]:
                r_file.write("library(%s)" % package)
                r_file.write("\n")
    else:
        r_file.write(
            "training <- data.frame(fread('%s', sep='%s', header=TRUE, blank.lines.skip=TRUE, showProgress=FALSE), row.names=1)"
            % (training_vars, separator))
        r_file.write("\n")
        # We have to make sure that class variable values start with a letter as
        # they will be used as variables in the probabilities calculation
        r_file.write("origclassnames <- training$%s" % classcol)
        r_file.write("\n")
        r_file.write(
            "training$%s <- as.factor(paste('class', training$%s, sep='_'))" %
            (classcol, classcol))
        r_file.write("\n")
        if tuning_sample_size:
            r_file.write(
                "rndid <- with(training, ave(training[,1], %s, FUN=function(x) {sample.int(length(x))}))"
                % classcol)
            r_file.write("\n")
            r_file.write("tuning_data <- training[rndid<=%s,]" %
                         tuning_sample_size)
            r_file.write("\n")
        else:
            r_file.write("tuning_data <- training")
            r_file.write("\n")
        # If a max_features value is set, then proceed to feature selection.
        # Currently, feature selection uses random forest. TODO: specific feature selection for each classifier.
        if max_features:
            r_file.write(
                "RfeControl <- rfeControl(functions=rfFuncs, method='cv', number=10, returnResamp = 'all')"
            )
            r_file.write("\n")
            r_file.write(
                "RfeResults <- rfe(subset(tuning_data, select=-%s), tuning_data$%s, sizes=c(1:%i), rfeControl=RfeControl)"
                % (classcol, classcol, max_features))
            r_file.write("\n")
            r_file.write("if(length(predictors(RfeResults))>%s)" %
                         max_features)
            r_file.write("\n")
            r_file.write(
                "{if((RfeResults$results$Accuracy[%s+1] - RfeResults$results$Accuracy[%s])/RfeResults$results$Accuracy[%s] < 0.03)"
                % (max_features, max_features, max_features))
            r_file.write("\n")
            r_file.write(
                "{RfeUpdate <- update(RfeResults, subset(tuning_data, select=-%s), tuning_data$%s, size=%s)"
                % (classcol, classcol, max_features))
            r_file.write("\n")
            r_file.write("bestPredictors <- RfeUpdate$bestVar}}")
            r_file.write(" else {")
            r_file.write("\n")
            r_file.write("bestPredictors <- predictors(RfeResults)}")
            r_file.write("\n")
            r_file.write(
                "tuning_data <- tuning_data[,c('%s', bestPredictors)]" %
                classcol)
            r_file.write("\n")
            r_file.write("training <- training[,c('%s', bestPredictors)]" %
                         classcol)
            r_file.write("\n")
            if not flags["t"]:
                r_file.write("features <- features[,bestPredictors]")
                r_file.write("\n")
        if probabilities:
            r_file.write(
                "MyControl.cv <- trainControl(method='repeatedcv', number=%s, repeats=%s, classProbs=TRUE, sampling='down')"
                % (folds, partitions))
        else:
            r_file.write(
                "MyControl.cv <- trainControl(method='repeatedcv', number=%s, repeats=%s, sampling='down')"
                % (folds, partitions))
        r_file.write("\n")
        r_file.write("fmla <- %s ~ ." % classcol)
        r_file.write("\n")
        r_file.write("models.cv <- list()")
        r_file.write("\n")
        r_file.write("finalModels <- list()")
        r_file.write("\n")
        r_file.write("variableImportance <- list()")
        r_file.write("\n")
        if training_sample_size:
            r_file.write(
                "rndid <- with(training, ave(training[,2], %s, FUN=function(x) {sample.int(length(x))}))"
                % classcol)
            r_file.write("\n")
            r_file.write("training_data <- training[rndid<=%s,]" %
                         training_sample_size)
            r_file.write("\n")
        else:
            r_file.write("training_data <- training")
            r_file.write("\n")
        for classifier in classifiers:
            if classifier in tunegrids:
                r_file.write("Grid <- expand.grid(%s)" % tunegrids[classifier])
                r_file.write("\n")
                r_file.write(
                    "%sModel.cv <- train(fmla, tuning_data, method='%s', trControl=MyControl.cv, tuneGrid=Grid"
                    % (classifier, classifier))
            else:
                r_file.write(
                    "%sModel.cv <- train(fmla, tuning_data, method='%s', trControl=MyControl.cv, tuneLength=%s"
                    % (classifier, classifier, tunelength))
            if flags["n"]:
                r_file.write(", preprocess=c('center', 'scale')")
            r_file.write(")")
            r_file.write("\n")
            r_file.write("models.cv$%s <- %sModel.cv" %
                         (classifier, classifier))
            r_file.write("\n")
            r_file.write(
                "finalControl <- trainControl(method = 'none', classProbs = TRUE)"
            )
            r_file.write("\n")

            r_file.write(
                "finalModel <- train(fmla, training_data, method='%s', trControl=finalControl, tuneGrid=%sModel.cv$bestTune"
                % (classifier, classifier))
            if flags["n"]:
                r_file.write(", preprocess=c('center', 'scale')")
            r_file.write(")")
            r_file.write("\n")
            r_file.write("finalModels$%s <- finalModel" % classifier)
            r_file.write("\n")
            r_file.write("variableImportance$%s <- varImp(finalModel)" %
                         classifier)
            r_file.write("\n")
        if len(classifiers) > 1:
            r_file.write("resamps.cv <- resamples(models.cv)")
            r_file.write("\n")
            r_file.write(
                "accuracy_means <- as.vector(apply(resamps.cv$values[seq(2,length(resamps.cv$values), by=2)], 2, mean))"
            )
            r_file.write("\n")
            r_file.write(
                "kappa_means <- as.vector(apply(resamps.cv$values[seq(3,length(resamps.cv$values), by=2)], 2, mean))"
            )
            r_file.write("\n")
        else:
            r_file.write("resamps.cv <- models.cv[[1]]$resample")
            r_file.write("\n")
            r_file.write("accuracy_means <- mean(resamps.cv$Accuracy)")
            r_file.write("\n")
            r_file.write("kappa_means <- mean(resamps.cv$Kappa)")
            r_file.write("\n")

        if output_model_file:
            r_file.write("saveRDS(finalModels, '%s')" % (output_model_file))
            r_file.write("\n")

    if not flags["t"]:
        r_file.write("predicted <- data.frame(predict(finalModels, features))")
        r_file.write("\n")
        # Now erase the 'class_' prefix again in order to get original class values
        r_file.write(
            "predicted <- data.frame(sapply(predicted, function (x) {gsub('class_', '', x)}))"
        )
        r_file.write("\n")
        if probabilities:
            r_file.write(
                "probabilities <- data.frame(predict(finalModels, features, type='prob'))"
            )
            r_file.write("\n")
            r_file.write(
                "colnames(probabilities) <- gsub('.c', '_prob_c', colnames(probabilities))"
            )
            r_file.write("\n")
        r_file.write("ids <- rownames(features)")
        r_file.write("\n")
        # We try to liberate memory space as soon as possible, so erasing non necessary data
        r_file.write("rm(features)")
        r_file.write("\n")
        if flags["i"] or len(classifiers) == 1:
            r_file.write("resultsdf <- data.frame(id=ids, predicted)")
        else:
            r_file.write("resultsdf <- data.frame(id=ids)")
        r_file.write("\n")

        if len(classifiers) > 1:
            r_file.write(voting_function)
            r_file.write("\n")

            if weighting_metric == "kappa":
                r_file.write("weighting_base <- kappa_means")
            else:
                r_file.write("weighting_base <- accuracy_means")
            r_file.write("\n")
            for weighting_mode in weighting_modes:
                r_file.write(weighting_functions[weighting_mode])
                r_file.write("\n")
                r_file.write("weights <- weights / sum(weights)")
                r_file.write("\n")
                r_file.write("vote <- apply(predicted, 1, voting, w=weights)")
                r_file.write("\n")
                r_file.write(
                    "vote <- as.data.frame(matrix(unlist(vote), ncol=2, byrow=TRUE))"
                )
                r_file.write("\n")
                r_file.write("resultsdf$%s_%s <- vote$V1" %
                             (output_classcol, weighting_mode))
                r_file.write("\n")
                r_file.write("resultsdf$%s_%s <- vote$V2" %
                             (output_probcol, weighting_mode))
                r_file.write("\n")

        r_file.write("rm(predicted)")
        r_file.write("\n")

        if allmap and not flags["f"]:
            model_output = gscript.tempfile().replace("\\", "/")
            model_output_csv = model_output + ".csv"
            write_string = "write.csv(resultsdf, '%s'," % model_output_csv
            write_string += " row.names=FALSE, quote=FALSE)"
            r_file.write(write_string)
            r_file.write("\n")

        if classified_map:
            reclass_files = {}
            if len(classifiers) > 1:
                if flags["i"]:
                    for classifier in classifiers:
                        tmpfilename = gscript.tempfile()
                        reclass_files[classifier] = tmpfilename.replace(
                            "\\", "/")
                        r_file.write(
                            "tempdf <- data.frame(resultsdf$id, resultsdf$%s)"
                            % (classifier))
                        r_file.write("\n")
                        r_file.write(
                            "reclass <- data.frame(out=apply(tempdf, 1, function(x) paste(x[1],'=', x[2])))"
                        )
                        r_file.write("\n")
                        r_file.write(
                            "write.table(reclass$out, '%s', col.names=FALSE, row.names=FALSE, quote=FALSE)"
                            % reclass_files[classifier])
                        r_file.write("\n")
                for weighting_mode in weighting_modes:
                    tmpfilename = gscript.tempfile()
                    reclass_files[weighting_mode] = tmpfilename.replace(
                        "\\", "/")
                    r_file.write(
                        "tempdf <- data.frame(resultsdf$id, resultsdf$%s_%s)" %
                        (output_classcol, weighting_mode))
                    r_file.write("\n")
                    r_file.write(
                        "reclass <- data.frame(out=apply(tempdf, 1, function(x) paste(x[1],'=', x[2])))"
                    )
                    r_file.write("\n")
                    r_file.write(
                        "write.table(reclass$out, '%s', col.names=FALSE, row.names=FALSE, quote=FALSE)"
                        % reclass_files[weighting_mode])
                    r_file.write("\n")
            else:
                tmpfilename = gscript.tempfile()
                reclass_files[classifiers[0]] = tmpfilename.replace("\\", "/")
                r_file.write(
                    "reclass <- data.frame(out=apply(resultsdf, 1, function(x) paste(x[1],'=', x[2])))"
                )
                r_file.write("\n")
                r_file.write(
                    "write.table(reclass$out, '%s', col.names=FALSE, row.names=FALSE, quote=FALSE)"
                    % reclass_files[classifiers[0]])
                r_file.write("\n")

        if classification_results:
            if probabilities:
                r_file.write("resultsdf <- cbind(resultsdf, probabilities)")
                r_file.write("\n")
                r_file.write("rm(probabilities)")
                r_file.write("\n")
            r_file.write(
                "write.csv(resultsdf, '%s', row.names=FALSE, quote=FALSE)" %
                classification_results)
            r_file.write("\n")
            r_file.write("rm(resultsdf)")
            r_file.write("\n")
        r_file.write("\n")

    if accuracy_file:
        r_file.write(
            "df_means <- data.frame(method=names(models.cv),accuracy=accuracy_means, kappa=kappa_means)"
        )
        r_file.write("\n")
        r_file.write(
            "write.csv(df_means, '%s', row.names=FALSE, quote=FALSE)" %
            accuracy_file)
        r_file.write("\n")
    if variable_importance_file:
        r_file.write("sink('%s')" % variable_importance_file)
        r_file.write("\n")
        for classifier in classifiers:
            r_file.write("cat('Classifier: %s')" % classifier)
            r_file.write("\n")
            r_file.write("cat('******************************')")
            r_file.write("\n")
            r_file.write(
                "variableImportance$rf$importance[order(variableImportance$rf$importance$Overall, decreasing=TRUE),, drop=FALSE]"
            )
            r_file.write("\n")
        r_file.write("sink()")
        r_file.write("\n")
    if model_details:
        r_file.write("sink('%s')" % model_details)
        r_file.write("\n")
        r_file.write("cat('BEST TUNING VALUES')")
        r_file.write("\n")
        r_file.write("cat('******************************')")
        r_file.write("\n")
        r_file.write("\n")
        r_file.write("lapply(models.cv, function(x) x$best)")
        r_file.write("\n")
        r_file.write("cat('\n\n')")
        r_file.write("\n")
        r_file.write("cat('SUMMARY OF RESAMPLING RESULTS')")
        r_file.write("\n")
        r_file.write("cat('******************************')")
        r_file.write("\n")
        r_file.write("cat('\n\n')")
        r_file.write("\n")
        r_file.write("summary(resamps.cv)")
        r_file.write("\n")
        r_file.write("cat('\n')")
        r_file.write("\n")
        r_file.write("cat('\nRESAMPLED CONFUSION MATRICES')")
        r_file.write("\n")
        r_file.write("cat('******************************')")
        r_file.write("\n")
        r_file.write("cat('\n\n')")
        r_file.write("\n")
        r_file.write(
            "conf.mat.cv <- lapply(models.cv, function(x) confusionMatrix(x))")
        r_file.write("\n")
        r_file.write("print(conf.mat.cv)")
        r_file.write("\n")
        r_file.write("cat('DETAILED CV RESULTS')")
        r_file.write("\n")
        r_file.write("cat('\n\n')")
        r_file.write("\n")
        r_file.write("cat('******************************')")
        r_file.write("\n")
        r_file.write("cat('\n\n')")
        r_file.write("\n")
        r_file.write("lapply(models.cv, function(x) x$results)")
        r_file.write("\n")
        r_file.write("sink()")
        r_file.write("\n")

    if bw_plot_file and len(classifiers) > 1:
        r_file.write("png('%s.png')" % bw_plot_file)
        r_file.write("\n")
        r_file.write("print(bwplot(resamps.cv))")
        r_file.write("\n")
        r_file.write("dev.off()")
        r_file.write("\n")

    r_file.close()

    if r_script_file:
        shutil.copy(r_commands, r_script_file)

    gscript.message("Running R now. Following output is R output.")
    try:
        subprocess.check_call(
            ["Rscript", r_commands],
            stderr=subprocess.STDOUT,
        )
    except subprocess.CalledProcessError:
        gscript.fatal(
            "There was an error in the execution of the R script.\nPlease check the R output."
        )

    gscript.message("Finished running R.")

    if allmap and not flags["f"]:

        model_output_csvt = model_output + ".csvt"
        temptable = "classif_tmp_table_%d" % os.getpid()

        f = open(model_output_csvt, "w")
        header_string = '"Integer"'
        if flags["i"]:
            for classifier in classifiers:
                header_string += ',"Integer"'
        if len(classifiers) > 1:
            for weighting_mode in weighting_modes:
                header_string += ',"Integer"'
                header_string += ',"Real"'
        else:
            header_string += ',"Integer"'

        f.write(header_string)
        f.close()

        gscript.message("Loading results into attribute table")
        gscript.run_command(
            "db.in.ogr",
            input_=model_output_csv,
            output=temptable,
            overwrite=True,
            quiet=True,
        )
        index_creation = "CREATE INDEX idx_%s_cat" % temptable
        index_creation += " ON %s (id)" % temptable
        gscript.run_command("db.execute", sql=index_creation, quiet=True)
        columns = gscript.read_command("db.columns",
                                       table=temptable).splitlines()[1:]
        orig_cat = gscript.vector_db(allfeatures)[int(segments_layer)]["key"]
        gscript.run_command(
            "v.db.join",
            map_=allfeatures,
            column=orig_cat,
            otable=temptable,
            ocolumn="id",
            subset_columns=columns,
            quiet=True,
        )

    if classified_map:
        for classification, reclass_file in reclass_files.items():
            output_map = classified_map + "_" + classification
            gscript.run_command(
                "r.reclass",
                input=raster_segments_map,
                output=output_map,
                rules=reclass_file,
                quiet=True,
            )
Example #20
0
def main():
    # lazy imports
    import grass.temporal as tgis

    # Get the options
    type = options["type"]
    temporal_type = options["temporaltype"]
    columns = options["columns"]
    order = options["order"]
    where = options["where"]
    separator = gscript.separator(options["separator"])
    outpath = options["output"]
    colhead = flags['c']

    # Make sure the temporal database exists
    tgis.init()

    sp = tgis.dataset_factory(type, None)
    dbif = tgis.SQLDatabaseInterfaceConnection()
    dbif.connect()
    first = True

    if  gscript.verbosity() > 0 and not outpath:
        sys.stderr.write("----------------------------------------------\n")

    if outpath:
        outfile = open(outpath, 'w')

    for ttype in temporal_type.split(","):
        if ttype == "absolute":
            time = "absolute time"
        else:
            time = "relative time"

        stds_list = tgis.get_dataset_list(type, ttype, columns, where, order, dbif=dbif)

        # Use the correct order of the mapsets, hence first the current mapset, then
        # alphabetic ordering
        mapsets = tgis.get_tgis_c_library_interface().available_mapsets()

        # Print for each mapset separately
        for key in mapsets:
            if key in stds_list.keys():
                rows = stds_list[key]

                if rows:
                    if  gscript.verbosity() > 0 and not outpath:
                        if issubclass(sp.__class__, tgis.AbstractMapDataset):
                            sys.stderr.write(_("Time stamped %s maps with %s available in mapset <%s>:\n")%
                                                     (sp.get_type(), time, key))
                        else:
                            sys.stderr.write(_("Space time %s datasets with %s available in mapset <%s>:\n")%
                                                     (sp.get_new_map_instance(None).get_type(), time, key))

                    # Print the column names if requested
                    if colhead and first:
                        output = ""
                        count = 0
                        for key in rows[0].keys():
                            if count > 0:
                                output += separator + str(key)
                            else:
                                output += str(key)
                            count += 1
                        if outpath:
                            outfile.write("{st}\n".format(st=output))
                        else:
                            print(output)
                        first = False

                    for row in rows:
                        output = ""
                        count = 0
                        for col in row:
                            if count > 0:
                                output += separator + str(col)
                            else:
                                output += str(col)
                            count += 1
                        if outpath:
                            outfile.write("{st}\n".format(st=output))
                        else:
                            print(output)
    if outpath:
        outfile.close()
    dbif.close()
Example #21
0
def main():

    # Get the options
    type = options["type"]
    temporal_type = options["temporaltype"]
    columns = options["columns"]
    order = options["order"]
    where = options["where"]
    separator = gscript.separator(options["separator"])
    outpath = options["output"]
    colhead = flags['c']

    # Make sure the temporal database exists
    tgis.init()

    sp = tgis.dataset_factory(type, None)
    dbif = tgis.SQLDatabaseInterfaceConnection()
    dbif.connect()
    first = True

    if  gscript.verbosity() > 0 and not outpath:
        sys.stderr.write("----------------------------------------------\n")

    for ttype in temporal_type.split(","):
        if ttype == "absolute":
            time = "absolute time"
        else:
            time = "relative time"

        stds_list = tgis.get_dataset_list(type,  ttype,  columns,  where,  order, dbif=dbif)

        # Use the correct order of the mapsets, hence first the current mapset, then
        # alphabetic ordering
        mapsets = tgis.get_tgis_c_library_interface().available_mapsets()

        if outpath:
            outfile = open(outpath, 'w')

        # Print for each mapset separately
        for key in mapsets:
            if key in stds_list.keys():
                rows = stds_list[key]

                if rows:
                    if  gscript.verbosity() > 0 and not outpath:
                        if issubclass(sp.__class__,  tgis.AbstractMapDataset):
                            sys.stderr.write(_("Time stamped %s maps with %s available in mapset <%s>:\n")%\
                                                     (sp.get_type(),  time,  key))
                        else:
                            sys.stderr.write(_("Space time %s datasets with %s available in mapset <%s>:\n")%\
                                                     (sp.get_new_map_instance(None).get_type(),  time,  key))

                    # Print the column names if requested
                    if colhead == True and first == True:
                        output = ""
                        count = 0
                        for key in rows[0].keys():
                            if count > 0:
                                output += separator + str(key)
                            else:
                                output += str(key)
                            count += 1
                        if outpath:
                            outfile.write("{st}\n".format(st=output))
                        else:
                            print output
                        first = False

                    for row in rows:
                        output = ""
                        count = 0
                        for col in row:
                            if count > 0:
                                output += separator + str(col)
                            else:
                                output += str(col)
                            count += 1
                        if outpath:
                            outfile.write("{st}\n".format(st=output))
                        else:
                            print output
    if outpath:
        outfile.close()
    dbif.close()
Example #22
0
def test_backslash_separators():
    """Check that separtors specified as an escape sequence are correctly evaluated"""
    assert gs.separator(r"\t") == "\t"
    assert gs.separator(r"\n") == "\n"
Example #23
0
def main():
    options, flags = gs.parser()
    in_filename = options["input"]
    out_filename = options["output"]
    input_separator = gs.separator(options["separator"])
    prefix = options["prefix"]
    # https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes
    date_formats = None
    if options["recognized_date"]:
        date_formats = options["recognized_date"].split(",")
    out_date_format = options["clean_date"]
    missing_names = options["missing_names"].split(",")
    # TODO: lowercase the column names

    if prefix and re.match("[^A-Za-z]", prefix[0]):
        gs.fatal(
            _("Prefix (now <{prefix}>) must start with an ASCII letter (a-z or A-Z in English alphabeth)"
              ),
            prefix=prefix,
        )

    with open(in_filename, "r",
              newline="") as infile, open(out_filename, "w",
                                          newline="") as outfile:
        # TODO: Input format to parameters (important)
        # TODO: Output format to parameters (somewhat less important)
        input_csv = csv.reader(infile,
                               delimiter=input_separator,
                               quotechar='"')
        output_csv = csv.writer(outfile,
                                delimiter=",",
                                quotechar='"',
                                lineterminator="\n")
        for i, row in enumerate(input_csv):
            # TODO: Optionally remove newlines from cells.
            # In header and body replace by space (and turns into underscore for header).
            if i == 0:
                new_row = []
                num_unnamed_columns = 0
                duplicated_number = 2  # starting at two fro duplicated names
                for column_number, column in enumerate(row):
                    if date_formats:
                        column = reformat_date(date_formats, out_date_format,
                                               column)
                    if not column:
                        if not num_unnamed_columns:
                            column = missing_names[0]
                        elif len(missing_names) == 1:
                            column = f"{missing_names[0]}_{column_number + 1}"
                        elif num_unnamed_columns < len(missing_names):
                            column = missing_names[num_unnamed_columns]
                        else:
                            column = f"{missing_names[-1]}_{name_duplicated}"
                            duplicated_number += 1
                        num_unnamed_columns += 1
                    column = minimize_whitespace(column)
                    # TODO: Also duplicate column names should be resolved here.
                    # Perhaps just move the else of no column names here or perhaps not
                    # because it would be difficult to navigate the code.
                    column = make_name_sql_compliant(column,
                                                     fallback_prefix=prefix)
                    new_row.append(column)
            else:
                # TODO: Optionally reformat dates in the body too (but without prefix).
                # TODO: Recognize numbers with spaces and commas and fix them.
                # For example, 10,000 and 10 000,5 should/might be
                # 10000 (or 10.0) 10000.5.
                # TODO: General find and replace for cells (which could take care of some escape chars
                # or other mess. Question is how to make it general/more than one replace pair.
                # (Remove would be easier to have in the interface.)
                new_row = []
                row_has_content = False
                for column in row:
                    if column:
                        row_has_content = True
                    # TODO: Use bools for this, perhaps a dedicated class for this type of option.
                    # This is an experiment with extremely aggressive replacemt of flags by options.
                    if "collapse_whitespace" in options["cell_clean"]:
                        column = collapse_whitespace(column)
                    if "strip_whitespace" in options["cell_clean"]:
                        column = column.strip()
                    if date_formats and "date_format" in options["cell_clean"]:
                        column = reformat_date(date_formats, out_date_format,
                                               column)
                    new_row.append(column)
                # Skips completely empty rows and rows with only separators.
                if not row_has_content:
                    continue
                # TODO: Add except csv.Error as error:
            output_csv.writerow(new_row)
Example #24
0
def main():
    global insert_sql
    insert_sql = None
    global temporary_vect
    temporary_vect = None
    global stats_temp_file
    stats_temp_file = None
    global content
    content = None
    global raster
    raster = options['raster']
    global decimals
    decimals = int(options['decimals'])
    global zone_map
    zone_map = options['zone_map']

    csvfile = options['csvfile'] if options['csvfile'] else []
    separator = gscript.separator(options['separator'])
    prefix = options['prefix'] if options['prefix'] else []
    classes_list = options['classes_list'].split(
        ',') if options['classes_list'] else []
    vectormap = options['vectormap'] if options['vectormap'] else []

    prop = False if 'proportion' not in options['statistics'].split(
        ',') else True
    mode = False if 'mode' not in options['statistics'].split(',') else True

    # Check if input layer is CELL
    if gscript.parse_command('r.info', flags='g',
                             map=raster)['datatype'] != 'CELL':
        gscript.fatal(
            _("The type of the input map 'raster' is not CELL. Please use raster with integer values"
              ))
    if gscript.parse_command('r.info', flags='g',
                             map=zone_map)['datatype'] != 'CELL':
        gscript.fatal(
            _("The type of the input map 'zone_map' is not CELL. Please use raster with integer values"
              ))

    # Check if 'decimals' is + and with credible value
    if decimals <= 0:
        gscript.fatal(_("The number of decimals should be positive"))
    if decimals > 100:
        gscript.fatal(_("The number of decimals should not be more than 100"))

    # Adjust region to input map is flag active
    if flags['r']:
        gscript.use_temp_region()
        gscript.run_command('g.region', raster=zone_map)

    # R.STATS
    tmpfile = gscript.tempfile()
    try:
        if flags['n']:
            gscript.run_command(
                'r.stats',
                overwrite=True,
                flags='c',
                input='%s,%s' % (zone_map, raster),
                output=tmpfile,
                separator=separator)  # Consider null values in R.STATS
        else:
            gscript.run_command(
                'r.stats',
                overwrite=True,
                flags='cn',
                input='%s,%s' % (zone_map, raster),
                output=tmpfile,
                separator=separator)  # Do not consider null values in R.STATS
        gscript.message(_("r.stats command finished..."))
    except:
        gscript.fatal(_("The execution of r.stats failed"))

    # COMPUTE STATISTICS
    # Open csv file and create a csv reader
    rstatsfile = open(tmpfile, 'r')
    reader = csv.reader(rstatsfile, delimiter=separator)
    # Total pixels per category per zone
    totals_dict = {}
    for row in reader:
        if row[0] not in totals_dict:  # Will pass the condition only if the current zone ID does not exists in the dictionary
            totals_dict[row[0]] = {
            }  # Declare a new embedded dictionnary for the current zone ID
        totals_dict[row[0]][row[1]] = int(row[2])
    # Delete key '*' in 'totals_dict' that could append if there are null values on the zone raster
    if '*' in totals_dict:
        del totals_dict['*']
    # Close file
    rstatsfile.close()
    # Mode
    if mode:
        modalclass_dict = {}
        for ID in totals_dict:
            # The trick was found here : https://stackoverflow.com/a/268285/8013239
            mode = max(iter(totals_dict[ID].items()),
                       key=operator.itemgetter(1))[0]
            if mode == '*':  # If the mode is NULL values
                modalclass_dict[ID] = 'NULL'
            else:
                modalclass_dict[ID] = mode
    # Classes proportions
    if prop:
        # Get list of categories to output
        if classes_list:  #If list of classes provided by user
            class_dict = {str(a): ''
                          for a in classes_list
                          }  #To be sure it's string format
        else:
            class_dict = {}
        # Proportion of each category per zone
        proportion_dict = {}
        for ID in totals_dict:
            proportion_dict[ID] = {}
            for cl in totals_dict[ID]:
                if flags['p']:
                    proportion_dict[ID][cl] = round(
                        float(totals_dict[ID][cl]) /
                        sum(totals_dict[ID].values()) * 100, decimals)
                else:
                    proportion_dict[ID][cl] = round(
                        float(totals_dict[ID][cl]) /
                        sum(totals_dict[ID].values()), decimals)
                if cl == '*':
                    class_dict['NULL'] = ''
                else:
                    class_dict[cl] = ''
        # Fill class not met in the raster with zero
        for ID in proportion_dict:
            for cl in class_dict:
                if cl not in proportion_dict[ID].keys():
                    proportion_dict[ID][cl] = '{:.{}f}'.format(0, decimals)
        # Get list of class sorted by value (arithmetic)
        if 'NULL' in class_dict.keys():
            class_list = [int(k) for k in class_dict.keys() if k != 'NULL']
            class_list.sort()
            class_list.append('NULL')
        else:
            class_list = [int(k) for k in class_dict.keys()]
            class_list.sort()
    gscript.verbose(_("Statistics computed..."))

    # OUTPUT CONTENT
    # Header
    header = [
        'cat',
    ]
    if mode:
        if prefix:
            header.append('%s_mode' % prefix)
        else:
            header.append('mode')
    if prop:
        if prefix:
            [header.append('%s_prop_%s' % (prefix, cl)) for cl in class_list]
        else:
            [header.append('prop_%s' % cl) for cl in class_list]
    # Values
    value_dict = {}
    for ID in totals_dict:
        value_dict[ID] = []
        if mode:
            value_dict[ID].append(modalclass_dict[ID])
        if prop:
            for cl in class_list:
                value_dict[ID].append(proportion_dict[ID]['%s' % cl])

    # WRITE OUTPUT
    if csvfile:
        outfile = open(csvfile, 'w')
        writer = csv.writer(outfile, delimiter=separator)
        writer.writerow(header)
        csvcontent_dict = copy.deepcopy(value_dict)
        [csvcontent_dict[ID].insert(0, ID) for ID in csvcontent_dict]
        [csvcontent_dict[ID] for ID in csvcontent_dict]
        writer.writerows(csvcontent_dict.values())
        outfile.close()
    if vectormap:
        gscript.message(_("Creating output vector map..."))
        temporary_vect = 'rzonalclasses_tmp_vect_%d' % os.getpid()
        gscript.run_command('r.to.vect',
                            input_=zone_map,
                            output=temporary_vect,
                            type_='area',
                            flags='vt',
                            overwrite=True,
                            quiet=True)
        insert_sql = gscript.tempfile()
        fsql = open(insert_sql, 'w')
        fsql.write('BEGIN TRANSACTION;\n')
        if gscript.db_table_exist(temporary_vect):
            if gscript.overwrite():
                fsql.write('DROP TABLE %s;' % temporary_vect)
            else:
                gscript.fatal(
                    _("Table %s already exists. Use --o to overwrite") %
                    temporary_vect)
        create_statement = 'CREATE TABLE ' + temporary_vect + ' (cat int PRIMARY KEY);\n'
        fsql.write(create_statement)
        for col in header[1:]:
            if col.split('_')[-1] == 'mode':  # Mode column should be integer
                addcol_statement = 'ALTER TABLE %s ADD COLUMN %s integer;\n' % (
                    temporary_vect, col)
            else:  # Proportions column should be double precision
                addcol_statement = 'ALTER TABLE %s ADD COLUMN %s double precision;\n' % (
                    temporary_vect, col)
            fsql.write(addcol_statement)
        for key in value_dict:
            insert_statement = 'INSERT INTO %s VALUES (%s, %s);\n' % (
                temporary_vect, key, ','.join(
                    [str(x) for x in value_dict[key]]))
            fsql.write(insert_statement)
        fsql.write('END TRANSACTION;')
        fsql.close()
        gscript.run_command('db.execute', input=insert_sql, quiet=True)
        gscript.run_command('v.db.connect',
                            map_=temporary_vect,
                            table=temporary_vect,
                            quiet=True)
        gscript.run_command('g.copy',
                            vector='%s,%s' % (temporary_vect, vectormap),
                            quiet=True)
Example #25
0
def main():

    global insert_sql
    insert_sql = None
    global temporary_vect
    temporary_vect = None
    global stats_temp_file
    stats_temp_file = None

    segment_map = options["map"]
    csvfile = options["csvfile"] if options["csvfile"] else []
    vectormap = options["vectormap"] if options["vectormap"] else []
    global rasters
    rasters = options["rasters"].split(",") if options["rasters"] else []
    area_measures = (
        options["area_measures"].split(",")
        if (options["area_measures"] and not flags["s"])
        else []
    )
    neighborhood = True if flags["n"] else False
    if neighborhood:
        if not gscript.find_program("r.neighborhoodmatrix", "--help"):
            message = _("You need to install the addon r.neighborhoodmatrix to be able")
            message += _(" to calculate area measures.\n")
            message += _(
                " You can install the addon with 'g.extension r.neighborhoodmatrix'"
            )
            gscript.fatal(message)

    raster_statistics = (
        options["raster_statistics"].split(",") if options["raster_statistics"] else []
    )
    separator = gscript.separator(options["separator"])
    processes = int(options["processes"])

    output_header = ["cat"]
    output_dict = collections.defaultdict(list)

    raster_stat_dict = {
        "zone": 0,
        "min": 4,
        "third_quart": 16,
        "max": 5,
        "sum": 12,
        "null_cells": 3,
        "median": 15,
        "label": 1,
        "first_quart": 14,
        "range": 6,
        "mean_of_abs": 8,
        "stddev": 9,
        "non_null_cells": 2,
        "coeff_var": 11,
        "variance": 10,
        "sum_abs": 13,
        "perc_90": 17,
        "mean": 7,
    }

    geometry_stat_dict = {
        "cat": 0,
        "area": 1,
        "perimeter": 2,
        "compact_square": 3,
        "compact_circle": 4,
        "fd": 5,
        "xcoords": 6,
        "ycoords": 7,
    }

    if flags["r"]:
        gscript.use_temp_region()
        gscript.run_command("g.region", raster=segment_map)

    stats_temp_file = gscript.tempfile()
    if area_measures:
        gscript.message(_("Calculating geometry statistics..."))
        output_header += area_measures
        stat_indices = [geometry_stat_dict[x] for x in area_measures]
        gscript.run_command(
            "r.object.geometry",
            input_=segment_map,
            output=stats_temp_file,
            overwrite=True,
            quiet=True,
        )

        firstline = True
        with open(stats_temp_file, "r") as fin:
            for line in fin:
                if firstline:
                    firstline = False
                    continue
                values = line.rstrip().split("|")
                output_dict[values[0]] = [values[x] for x in stat_indices]

    if rasters:
        if not flags["c"]:
            gscript.message(_("Checking usability of raster maps..."))
            rasters_to_remove = []
            for raster in rasters:
                null_values_found = False
                if not gscript.find_file(raster, element="cell")["name"]:
                    gscript.message(_("Cannot find raster '%s'" % raster))
                    gscript.message(_("Removing this raster from list."))
                    rasters_to_remove.append(raster)
                    continue
                current_mapset = gscript.gisenv()["MAPSET"]
                if gscript.find_file("MASK", element="cell", mapset=current_mapset)[
                    "name"
                ]:

                    null_test = gscript.read_command(
                        "r.stats", flags="N", input_=["MASK", raster], quiet=True
                    ).splitlines()
                    if "1 *" in null_test:
                        null_values_found = True

                else:
                    raster_info = gscript.parse_command(
                        "r.univar", flags="g", map_=raster, quiet=True
                    )
                    if len(raster_info) == 0 or int(raster_info["null_cells"]) > 0:
                        null_values_found = True

                if null_values_found:
                    message = "Raster <%s> contains null values.\n" % raster
                    message += "This can lead to errors in the calculations.\n"
                    message += "Check region settings and raster extent.\n"
                    message += "Possibly fill null values of raster.\n"
                    message += "Removing this raster from list."
                    gscript.warning(message)
                    rasters_to_remove.append(raster)

            for raster in rasters_to_remove:
                rasters.remove(raster)

        if len(rasters) > 0:
            gscript.message(_("Calculating statistics for the following raster maps:"))
            gscript.message(",".join(rasters))
            if len(rasters) < processes:
                processes = len(rasters)
                gscript.message(
                    _(
                        "Only one process per raster. Reduced number of processes to %i."
                        % processes
                    )
                )

            stat_indices = [raster_stat_dict[x] for x in raster_statistics]
            pool = Pool(processes)
            func = partial(worker, segment_map, stats_temp_file)
            pool.map(func, rasters)
            pool.close()
            pool.join()

            for raster in rasters:
                rastername = raster.split("@")[0]
                rastername = rastername.replace(".", "_")
                temp_file = stats_temp_file + "." + rastername
                output_header += [rastername + "_" + x for x in raster_statistics]
                firstline = True
                with open(temp_file, "r") as fin:
                    for line in fin:
                        if firstline:
                            firstline = False
                            continue
                        values = line.rstrip().split("|")
                        output_dict[values[0]] = output_dict[values[0]] + [
                            values[x] for x in stat_indices
                        ]

    # Calculating neighborhood statistics if requested
    if neighborhood:

        gscript.message(_("Calculating neighborhood statistics..."))

        # Add neighbordhood statistics to headers
        original_nb_values = len(output_header) - 1
        new_headers = ["neighbors_count"]
        for i in range(1, len(output_header)):
            new_headers.append("%s_nbrmean" % output_header[i])
            new_headers.append("%s_nbrstddev" % output_header[i])

        output_header += new_headers

        # Get sorted neighborhood matrix
        nbr_matrix = sorted(
            [
                x.split("|")
                for x in gscript.read_command(
                    "r.neighborhoodmatrix", input_=segment_map, flags="d", quiet=True
                ).splitlines()
            ]
        )

        # Calculate mean and stddev of neighbor values for each variable in the
        # output_dict
        for key, group in groupby(nbr_matrix, lambda x: x[0]):
            d = {}
            for i in range(original_nb_values):
                d[i] = (0, 0, 0)
            nbrlist = [str(x[1]) for x in group]
            if len(nbrlist) > 1:
                for nbr in nbrlist:
                    for i in range(original_nb_values):
                        d[i] = update(d[i], float(output_dict[nbr][i]))
                output_dict[key] = output_dict[key] + [str(len(nbrlist))]
                output_dict[key] = output_dict[key] + [
                    str(i) for sub in [finalize(x) for x in d.values()] for i in sub
                ]
            else:
                newvalues = ["1"]
                nbr = nbrlist[0]
                for i in range(original_nb_values):
                    newvalues.append(output_dict[nbr][i])
                    newvalues.append("0")
                output_dict[key] = output_dict[key] + newvalues

    message = _("Some values could not be calculated for the objects below. ")
    message += _("These objects are thus not included in the results. ")
    message += _("HINT: Check some of the raster maps for null values ")
    message += _("and possibly fill these values with r.fillnulls.")
    error_objects = []

    if csvfile:
        with open(csvfile, "w") as f:
            f.write(separator.join(output_header) + "\n")
            for key in output_dict:
                if len(output_dict[key]) + 1 == len(output_header):
                    f.write(key + separator + separator.join(output_dict[key]) + "\n")
                else:
                    error_objects.append(key)
        f.close()

    if vectormap:
        gscript.message(_("Creating output vector map..."))
        temporary_vect = "segmstat_tmp_vect_%d" % os.getpid()
        gscript.run_command(
            "r.to.vect",
            input_=segment_map,
            output=temporary_vect,
            type_="area",
            flags="vt",
            overwrite=True,
            quiet=True,
        )

        insert_sql = gscript.tempfile()
        fsql = open(insert_sql, "w")
        fsql.write("BEGIN TRANSACTION;\n")
        if gscript.db_table_exist(temporary_vect):
            if gscript.overwrite():
                fsql.write("DROP TABLE %s;" % temporary_vect)
            else:
                gscript.fatal(
                    _("Table %s already exists. Use --o to overwrite" % temporary_vect)
                )
        create_statement = (
            "CREATE TABLE " + temporary_vect + " (cat int PRIMARY KEY);\n"
        )
        fsql.write(create_statement)
        for header in output_header[1:]:
            addcol_statement = "ALTER TABLE %s ADD COLUMN %s double precision;\n" % (
                temporary_vect,
                header,
            )
            fsql.write(addcol_statement)
        for key in output_dict:
            if len(output_dict[key]) + 1 == len(output_header):
                sql = "INSERT INTO %s VALUES (%s, %s);\n" % (
                    temporary_vect,
                    key,
                    ",".join(output_dict[key]),
                )
                sql = sql.replace("inf", "NULL")
                sql = sql.replace("nan", "NULL")
                fsql.write(sql)
            else:
                if not csvfile:
                    error_objects.append(key)

        fsql.write("END TRANSACTION;")
        fsql.close()

        gscript.run_command("db.execute", input=insert_sql, quiet=True)
        gscript.run_command(
            "v.db.connect", map_=temporary_vect, table=temporary_vect, quiet=True
        )
        gscript.run_command(
            "g.copy", vector="%s,%s" % (temporary_vect, vectormap), quiet=True
        )

    if error_objects:
        object_string = ", ".join(error_objects[:100])
        message += _(
            "\n\nObjects with errors (only first 100 are shown):\n%s" % object_string
        )
        gscript.message(message)
Example #26
0
def main(options, flags):
    import grass.pygrass.modules as pymod
    import grass.temporal as tgis
    from grass.pygrass.vector import VectorTopo

    invect = options["input"]
    if invect.find("@") != -1:
        invect = invect.split("@")[0]
    incol = options["date_column"]
    indate = options["date"]
    endcol = options["final_date_column"]
    enddate = options["final_date"]
    strds = options["strds"]
    nprocs = options["nprocs"]
    if strds.find("@") != -1:
        strds_name = strds.split("@")[0]
    else:
        strds_name = strds
    output = options["output"]
    if options["columns"]:
        cols = options["columns"].split(",")
    else:
        cols = []
    mets = options["method"].split(",")
    gran = options["granularity"]
    dateformat = options["date_format"]
    separator = gscript.separator(options["separator"])
    update = flags["u"]
    create = flags["c"]

    stdout = False
    if output != "-" and update:
        gscript.fatal(_("Cannot combine 'output' option and 'u' flag"))
    elif output != "-" and create:
        gscript.fatal(_("Cannot combine 'output' option and 'c' flag"))
    elif output == "-" and (update or create):
        if update and not cols:
            gscript.fatal(_("Please set 'columns' option"))
        output = invect
    else:
        stdout = True

    if create:
        cols = []
        for m in mets:
            colname = "{st}_{me}".format(st=strds_name, me=m)
            cols.append(colname)
            try:
                pymod.Module(
                    "v.db.addcolumn",
                    map=invect,
                    columns="{col} "
                    "double precision".format(col=colname),
                )
            except CalledModuleError:
                gscript.fatal(
                    _("Not possible to create column "
                      "{col}".format(col=colname)))
        gscript.warning(
            _("Attribute table of vector {name} will be updated"
              "...").format(name=invect))
    elif update:
        colexist = pymod.Module("db.columns", table=invect,
                                stdout_=PI).outputs.stdout.splitlines()
        for col in cols:
            if col not in colexist:
                gscript.fatal(
                    _("Column '{}' does not exist, please create it first".
                      format(col)))
        gscript.warning(
            _("Attribute table of vector {name} will be updated"
              "...").format(name=invect))

    if output != "-" and len(cols) != len(mets):
        gscript.fatal(
            _("'columns' and 'method' options must have the same "
              "number of elements"))
    tgis.init()
    dbif = tgis.SQLDatabaseInterfaceConnection()
    dbif.connect()
    sp = tgis.open_old_stds(strds, "strds", dbif)

    if sp.get_temporal_type() == "absolute":
        if gran:
            delta = int(tgis.gran_to_gran(gran, sp.get_granularity(), True))
            if tgis.gran_singular_unit(gran) in ["year", "month"]:
                delta = int(tgis.gran_to_gran(gran, "1 day", True))
                td = timedelta(delta)
            elif tgis.gran_singular_unit(gran) == "day":
                delta = tgis.gran_to_gran(gran, sp.get_granularity(), True)
                td = timedelta(delta)
            elif tgis.gran_singular_unit(gran) == "hour":
                td = timedelta(hours=delta)
            elif tgis.gran_singular_unit(gran) == "minute":
                td = timedelta(minutes=delta)
            elif tgis.gran_singular_unit(gran) == "second":
                td = timedelta(seconds=delta)
        else:
            td = None
    else:
        if sp.get_granularity() >= int(gran):
            gscript.fatal(
                _("Input granularity is smaller or equal to the {iv}"
                  " STRDS granularity".format(iv=strds)))
        td = int(gran)
    if incol and indate:
        gscript.fatal(_("Cannot combine 'date_column' and 'date' options"))
    elif not incol and not indate:
        gscript.fatal(_("You have to fill 'date_column' or 'date' option"))
    if incol:
        if endcol:
            mysql = "SELECT DISTINCT {dc},{ec} from {vmap} order by " "{dc}".format(
                vmap=invect, dc=incol, ec=endcol)
        else:
            mysql = "SELECT DISTINCT {dc} from {vmap} order by " "{dc}".format(
                vmap=invect, dc=incol)
        try:
            dates = pymod.Module("db.select",
                                 flags="c",
                                 stdout_=PI,
                                 stderr_=PI,
                                 sql=mysql)
            mydates = dates.outputs["stdout"].value.splitlines()
        except CalledModuleError:
            gscript.fatal(_("db.select return an error"))
    elif indate:
        if enddate:
            mydates = ["{ida}|{eda}".format(ida=indate, eda=enddate)]
        else:
            mydates = [indate]
        mydates = [indate]
        pymap = VectorTopo(invect)
        pymap.open("r")
        if len(pymap.dblinks) == 0:
            try:
                pymap.close()
                pymod.Module("v.db.addtable", map=invect)
            except CalledModuleError:
                dbif.close()
                gscript.fatal(
                    _("Unable to add table <%s> to vector map "
                      "<%s>" % invect))
        if pymap.is_open():
            pymap.close()
        qfeat = pymod.Module("v.category",
                             stdout_=PI,
                             stderr_=PI,
                             input=invect,
                             option="print")
        myfeats = qfeat.outputs["stdout"].value.splitlines()

    if stdout:
        outtxt = ""
    for data in mydates:
        try:
            start, final = data.split("|")
        except ValueError:
            start = data
            final = None
        if sp.get_temporal_type() == "absolute":
            fdata = datetime.strptime(start, dateformat)
        else:
            fdata = int(start)
        if final:
            sdata = datetime.strptime(final, dateformat)
        elif flags["a"]:
            sdata = fdata + td
        else:
            sdata = fdata
            fdata = sdata - td
        mwhere = "start_time >= '{inn}' and start_time < " "'{out}'".format(
            inn=fdata, out=sdata)
        lines = None
        try:
            r_what = pymod.Module(
                "t.rast.what",
                points=invect,
                strds=strds,
                layout="timerow",
                separator=separator,
                flags="v",
                where=mwhere,
                quiet=True,
                stdout_=PI,
                stderr_=PI,
                nprocs=nprocs,
            )
            lines = r_what.outputs["stdout"].value.splitlines()
        except CalledModuleError:
            gscript.warning("t.rast.what faild with where='{}'".format(mwhere))
            pass
        if incol:
            if endcol:
                mysql = ("SELECT DISTINCT cat from {vmap} where {dc}='{da}' "
                         "AND {ec}='{ed}' order by cat".format(vmap=invect,
                                                               da=start,
                                                               dc=incol,
                                                               ed=final,
                                                               ec=endcol))
            else:
                mysql = ("SELECT DISTINCT cat from {vmap} where {dc}='{da}' "
                         "order by cat".format(vmap=invect, da=start,
                                               dc=incol))
            try:
                qfeat = pymod.Module("db.select",
                                     flags="c",
                                     stdout_=PI,
                                     stderr_=PI,
                                     sql=mysql)
                myfeats = qfeat.outputs["stdout"].value.splitlines()
            except CalledModuleError:
                gscript.fatal(
                    _("db.select returned an error for date "
                      "{da}".format(da=start)))
        if not lines and stdout:
            for feat in myfeats:
                outtxt += "{di}{sep}{da}".format(di=feat,
                                                 da=start,
                                                 sep=separator)
                for n in range(len(mets)):
                    outtxt += "{sep}{val}".format(val="*", sep=separator)
                outtxt += "\n"
        if not lines:
            continue
        x = 0
        for line in lines:
            vals = line.split(separator)
            if vals[0] in myfeats:
                try:
                    nvals = np.array(vals[3:]).astype(float)
                except ValueError:
                    if stdout:
                        outtxt += "{di}{sep}{da}".format(di=vals[0],
                                                         da=start,
                                                         sep=separator)
                        for n in range(len(mets)):
                            outtxt += "{sep}{val}".format(val="*",
                                                          sep=separator)
                        outtxt += "\n"
                    continue
                if stdout:
                    outtxt += "{di}{sep}{da}".format(di=vals[0],
                                                     da=start,
                                                     sep=separator)
                for n in range(len(mets)):
                    result = None
                    if len(nvals) == 1:
                        result = nvals[0]
                    elif len(nvals) > 1:
                        result = return_value(nvals, mets[n])
                    if stdout:
                        if not result:
                            result = "*"
                        outtxt += "{sep}{val}".format(val=result,
                                                      sep=separator)
                    else:
                        try:
                            if incol:
                                mywhe = "{dc}='{da}' AND ".format(da=start,
                                                                  dc=incol)
                                if endcol:
                                    mywhe += "{dc}='{da}' AND ".format(
                                        da=final, dc=endcol)

                                mywhe += "cat={ca}".format(ca=vals[0])

                                pymod.Module(
                                    "v.db.update",
                                    map=output,
                                    column=cols[n],
                                    value=str(result),
                                    where=mywhe,
                                )
                            else:
                                pymod.Module(
                                    "v.db.update",
                                    map=output,
                                    column=cols[n],
                                    value=str(result),
                                    where="cat={ca}".format(ca=vals[0]),
                                )
                        except CalledModuleError:
                            gscript.fatal(_("v.db.update return an error"))
                if stdout:
                    outtxt += "\n"
                if x == len(myfeats):
                    break
                else:
                    x += 1
    if stdout:
        print(outtxt)
Example #27
0
def main():
    global insert_sql
    insert_sql = None
    global temporary_vect
    temporary_vect = None
    global stats_temp_file
    stats_temp_file = None
    global content
    content = None
    global raster
    raster = options["raster"]
    global decimals
    decimals = int(options["decimals"])
    global zone_map
    zone_map = options["zone_map"]

    csvfile = options["csvfile"] if options["csvfile"] else []
    separator = gscript.separator(options["separator"])
    prefix = options["prefix"] if options["prefix"] else []
    classes_list = options["classes_list"].split(
        ",") if options["classes_list"] else []
    vectormap = options["vectormap"] if options["vectormap"] else []
    prop = False if "proportion" not in options["statistics"].split(
        ",") else True
    mode = False if "mode" not in options["statistics"].split(",") else True

    if flags[
            "c"]:  # Check only if flag activated - Can be bottleneck in case of very large raster.
        # Check if input layer is CELL
        if gscript.parse_command("r.info", flags="g",
                                 map=raster)["datatype"] != "CELL":
            gscript.fatal(
                _("The type of the input map 'raster' is not CELL. Please use raster with integer values"
                  ))
        if (gscript.parse_command("r.info", flags="g",
                                  map=zone_map)["datatype"] != "CELL"):
            gscript.fatal(
                _("The type of the input map 'zone_map' is not CELL. Please use raster with integer values"
                  ))

    # Check if 'decimals' is + and with credible value
    if decimals <= 0:
        gscript.fatal(_("The number of decimals should be positive"))
    if decimals > 100:
        gscript.fatal(_("The number of decimals should not be more than 100"))

    # Adjust region to input map is flag active
    if flags["r"]:
        gscript.use_temp_region()
        gscript.run_command("g.region", raster=zone_map)

    # R.STATS
    tmpfile = gscript.tempfile()
    try:
        if flags["n"]:
            gscript.run_command(
                "r.stats",
                overwrite=True,
                flags="c",
                input="%s,%s" % (zone_map, raster),
                output=tmpfile,
                separator=separator,
            )  # Consider null values in R.STATS
        else:
            gscript.run_command(
                "r.stats",
                overwrite=True,
                flags="cn",
                input="%s,%s" % (zone_map, raster),
                output=tmpfile,
                separator=separator,
            )  # Do not consider null values in R.STATS
        gscript.message(_("r.stats command finished..."))
    except:
        gscript.fatal(_("The execution of r.stats failed"))

    # COMPUTE STATISTICS
    # Open csv file and create a csv reader
    rstatsfile = open(tmpfile, "r")
    reader = csv.reader(rstatsfile, delimiter=separator)
    # Total pixels per category per zone
    totals_dict = {}
    for row in reader:
        if (
                row[0] not in totals_dict
        ):  # Will pass the condition only if the current zone ID does not exists yet in the dictionary
            totals_dict[row[0]] = {
            }  # Declare a new embedded dictionnary for the current zone ID
        if (
                flags["l"] and row[1] in classes_list
        ):  # Will pass only if flag -l is active and if the current class is in the 'classes_list'
            totals_dict[row[0]][row[1]] = int(row[2])
        else:
            totals_dict[row[0]][row[1]] = int(row[2])
    # Delete key '*' in 'totals_dict' that could append if there are null values on the zone raster
    if "*" in totals_dict:
        del totals_dict["*"]
    # Close file
    rstatsfile.close()
    # Get list of ID
    id_list = [ID for ID in totals_dict]
    # Mode
    if mode:
        modalclass_dict = {}
        for ID in id_list:
            # The trick was found here : https://stackoverflow.com/a/268285/8013239
            mode = max(iter(totals_dict[ID].items()),
                       key=operator.itemgetter(1))[0]
            if mode == "*":  # If the mode is NULL values
                modalclass_dict[ID] = "NULL"
            else:
                modalclass_dict[ID] = mode
    # Class proportions
    if prop:
        # Get list of categories to output
        if classes_list:  # If list of classes provided by user
            class_dict = {str(int(a)): ""
                          for a in classes_list
                          }  # To be sure it's string format
        else:
            class_dict = {}
        # Proportion of each category per zone
        proportion_dict = {}
        for ID in id_list:
            proportion_dict[ID] = {}
            for cl in totals_dict[ID]:
                if (
                        flags["l"] and cl not in classes_list
                ):  # with flag -l, output will contain only classes from 'classes_list'
                    continue
                if flags["p"]:
                    prop_value = (float(totals_dict[ID][cl]) /
                                  sum(totals_dict[ID].values()) * 100)
                else:
                    prop_value = float(totals_dict[ID][cl]) / sum(
                        totals_dict[ID].values())
                proportion_dict[ID][cl] = "{:.{}f}".format(
                    prop_value, decimals)
                if cl == "*":
                    class_dict["NULL"] = ""
                else:
                    class_dict[cl] = ""
        # Fill class not met in the raster with zero
        for ID in proportion_dict:
            for cl in class_dict:
                if cl not in proportion_dict[ID].keys():
                    proportion_dict[ID][cl] = "{:.{}f}".format(0, decimals)
        # Get list of class sorted by value (arithmetic ordering)
        if "NULL" in class_dict.keys():
            class_list = sorted(
                [int(k) for k in class_dict.keys() if k != "NULL"])
            class_list.append("NULL")
        else:
            class_list = sorted([int(k) for k in class_dict.keys()])
    gscript.verbose(_("Statistics computed..."))
    # Set 'totals_dict' to None to try RAM release
    totals_dict = None
    # OUTPUT CONTENT
    # Header
    header = [
        "cat",
    ]
    if mode:
        if prefix:
            header.append("%s_mode" % prefix)
        else:
            header.append("mode")
    if prop:
        if prefix:
            [header.append("%s_prop_%s" % (prefix, cl)) for cl in class_list]
        else:
            [header.append("prop_%s" % cl) for cl in class_list]
    # Values
    value_dict = {}
    for ID in id_list:
        value_dict[ID] = []
        value_dict[ID].append(ID)
        if mode:
            value_dict[ID].append(modalclass_dict[ID])
        if prop:
            for cl in class_list:
                value_dict[ID].append(proportion_dict[ID]["%s" % cl])
    # WRITE OUTPUT
    if csvfile:
        with open(csvfile, "w", newline="") as outfile:
            writer = csv.writer(outfile, delimiter=separator)
            writer.writerow(header)
            writer.writerows(value_dict.values())
    if vectormap:
        gscript.message(_("Creating output vector map..."))
        temporary_vect = "rzonalclasses_tmp_vect_%d" % os.getpid()
        gscript.run_command(
            "r.to.vect",
            input_=zone_map,
            output=temporary_vect,
            type_="area",
            flags="vt",
            overwrite=True,
            quiet=True,
        )
        insert_sql = gscript.tempfile()
        with open(insert_sql, "w", newline="") as fsql:
            fsql.write("BEGIN TRANSACTION;\n")
            if gscript.db_table_exist(temporary_vect):
                if gscript.overwrite():
                    fsql.write("DROP TABLE %s;" % temporary_vect)
                else:
                    gscript.fatal(
                        _("Table %s already exists. Use --o to overwrite") %
                        temporary_vect)
            create_statement = ("CREATE TABLE %s (cat int PRIMARY KEY);\n" %
                                temporary_vect)
            fsql.write(create_statement)
            for col in header[1:]:
                if col.split(
                        "_")[-1] == "mode":  # Mode column should be integer
                    addcol_statement = "ALTER TABLE %s ADD COLUMN %s integer;\n" % (
                        temporary_vect,
                        col,
                    )
                else:  # Proportions column should be double precision
                    addcol_statement = (
                        "ALTER TABLE %s ADD COLUMN %s double precision;\n" %
                        (temporary_vect, col))
                fsql.write(addcol_statement)
            for key in value_dict:
                insert_statement = "INSERT INTO %s VALUES (%s);\n" % (
                    temporary_vect,
                    ",".join(value_dict[key]),
                )
                fsql.write(insert_statement)
            fsql.write("END TRANSACTION;")
        gscript.run_command("db.execute", input=insert_sql, quiet=True)
        gscript.run_command("v.db.connect",
                            map_=temporary_vect,
                            table=temporary_vect,
                            quiet=True)
        gscript.run_command("g.copy",
                            vector="%s,%s" % (temporary_vect, vectormap),
                            quiet=True)
Example #28
0
def main():
    # lazy imports
    import grass.temporal as tgis

    # Get the options
    input = options["input"]
    where = options["where"]
    columns = options["columns"]
    tempwhere = options["t_where"]
    layer = options["layer"]
    separator = grass.separator(options["separator"])

    if where == "" or where == " " or where == "\n":
        where = None

    if columns == "" or columns == " " or columns == "\n":
        columns = None

    # Make sure the temporal database exists
    tgis.init()

    sp = tgis.open_old_stds(input, "stvds")

    rows = sp.get_registered_maps("name,layer,mapset,start_time,end_time",
                                  tempwhere, "start_time", None)

    col_names = ""
    if rows:
        for row in rows:
            vector_name = "%s@%s" % (row["name"], row["mapset"])
            # In case a layer is defined in the vector dataset,
            # we override the option layer
            if row["layer"]:
                layer = row["layer"]

            select = grass.read_command(
                "v.db.select",
                map=vector_name,
                layer=layer,
                columns=columns,
                separator="%s" % (separator),
                where=where,
            )

            if not select:
                grass.fatal(
                    _("Unable to run v.db.select for vector map <%s> "
                      "with layer %s") % (vector_name, layer))
            # The first line are the column names
            list = select.split("\n")
            count = 0
            for entry in list:
                if entry.strip() != "":
                    # print the column names in case they change
                    if count == 0:
                        col_names_new = "start_time%send_time%s%s" % (
                            separator,
                            separator,
                            entry,
                        )
                        if col_names != col_names_new:
                            col_names = col_names_new
                            print(col_names)
                    else:
                        if row["end_time"]:
                            print("%s%s%s%s%s" % (
                                row["start_time"],
                                separator,
                                row["end_time"],
                                separator,
                                entry,
                            ))
                        else:
                            print("%s%s%s%s" % (row["start_time"], separator,
                                                separator, entry))
                    count += 1
Example #29
0
def main(options, flags):
    # lazy imports
    import grass.temporal as tgis
    import grass.pygrass.modules as pymod

    # Get the options
    points = options["points"]
    coordinates = options["coordinates"]
    strds = options["strds"]
    output = options["output"]
    where = options["where"]
    order = options["order"]
    layout = options["layout"]
    null_value = options["null_value"]
    separator = gscript.separator(options["separator"])

    nprocs = int(options["nprocs"])
    write_header = flags["n"]
    use_stdin = flags["i"]
    vcat = flags["v"]

    #output_cat_label = flags["f"]
    #output_color = flags["r"]
    #output_cat = flags["i"]

    overwrite = gscript.overwrite()

    if coordinates and points:
        gscript.fatal(
            _("Options coordinates and points are mutually exclusive"))

    if not coordinates and not points and not use_stdin:
        gscript.fatal(
            _("Please specify the coordinates, the points option or use the 'i' flag to pipe coordinate positions to t.rast.what from stdin, to provide the sampling coordinates"
              ))

    if vcat and not points:
        gscript.fatal(_("Flag 'v' required option 'points'"))

    if use_stdin:
        coordinates_stdin = str(sys.__stdin__.read())
        # Check if coordinates are given with site names or IDs
        stdin_length = len(coordinates_stdin.split('\n')[0].split())
        if stdin_length <= 2:
            site_input = False
        elif stdin_length >= 3:
            site_input = True
    else:
        site_input = False

    # Make sure the temporal database exists
    tgis.init()
    # We need a database interface
    dbif = tgis.SQLDatabaseInterfaceConnection()
    dbif.connect()

    sp = tgis.open_old_stds(strds, "strds", dbif)
    maps = sp.get_registered_maps_as_objects(where=where,
                                             order=order,
                                             dbif=dbif)
    dbif.close()
    if not maps:
        gscript.fatal(
            _("Space time raster dataset <%s> is empty") % sp.get_id())

    # Setup flags are disabled due to test issues
    flags = ""
    #if output_cat_label is True:
    #    flags += "f"
    #if output_color is True:
    #    flags += "r"
    #if output_cat is True:
    #    flags += "i"
    if vcat is True:
        flags += "v"

    # Configure the r.what module
    if points:
        r_what = pymod.Module("r.what",
                              map="dummy",
                              output="dummy",
                              run_=False,
                              separator=separator,
                              points=points,
                              overwrite=overwrite,
                              flags=flags,
                              null_value=null_value,
                              quiet=True)
    elif coordinates:
        # Create a list of values
        coord_list = coordinates.split(",")
        r_what = pymod.Module("r.what",
                              map="dummy",
                              output="dummy",
                              run_=False,
                              separator=separator,
                              coordinates=coord_list,
                              overwrite=overwrite,
                              flags=flags,
                              null_value=null_value,
                              quiet=True)
    elif use_stdin:
        r_what = pymod.Module("r.what",
                              map="dummy",
                              output="dummy",
                              run_=False,
                              separator=separator,
                              stdin_=coordinates_stdin,
                              overwrite=overwrite,
                              flags=flags,
                              null_value=null_value,
                              quiet=True)
    else:
        gscript.error(_("Please specify points or coordinates"))

    if len(maps) < nprocs:
        nprocs = len(maps)

    # The module queue for parallel execution
    process_queue = pymod.ParallelModuleQueue(int(nprocs))
    num_maps = len(maps)

    # 400 Maps is the absolute maximum in r.what
    # We need to determie the number of maps that can be processed
    # in parallel

    # First estimate the number of maps per process. We use 400 maps
    # simultaniously as maximum for a single process

    num_loops = int(num_maps / (400 * nprocs))
    remaining_maps = num_maps % (400 * nprocs)

    if num_loops == 0:
        num_loops = 1
        remaining_maps = 0

    # Compute the number of maps for each process
    maps_per_loop = int((num_maps - remaining_maps) / num_loops)
    maps_per_process = int(maps_per_loop / nprocs)
    remaining_maps_per_loop = maps_per_loop % nprocs

    # We put the output files in an ordered list
    output_files = []
    output_time_list = []

    count = 0
    for loop in range(num_loops):
        file_name = gscript.tempfile() + "_%i" % (loop)
        count = process_loop(nprocs, maps, file_name, count, maps_per_process,
                             remaining_maps_per_loop, output_files,
                             output_time_list, r_what, process_queue)

    process_queue.wait()

    gscript.verbose("Number of raster map layers remaining for sampling %i" %
                    (remaining_maps))
    if remaining_maps > 0:
        # Use a single process if less then 100 maps
        if remaining_maps <= 100:
            map_names = []
            for i in range(remaining_maps):
                map = maps[count]
                map_names.append(map.get_id())
                count += 1
            mod = copy.deepcopy(r_what)
            mod(map=map_names, output=file_name)
            process_queue.put(mod)
        else:
            maps_per_process = int(remaining_maps / nprocs)
            remaining_maps_per_loop = remaining_maps % nprocs

            file_name = "out_remain"
            process_loop(nprocs, maps, file_name, count, maps_per_process,
                         remaining_maps_per_loop, output_files,
                         output_time_list, r_what, process_queue)

    # Wait for unfinished processes
    process_queue.wait()

    # Out the output files in the correct order together
    if layout == "row":
        one_point_per_row_output(separator, output_files, output_time_list,
                                 output, write_header, site_input, vcat)
    elif layout == "col":
        one_point_per_col_output(separator, output_files, output_time_list,
                                 output, write_header, site_input, vcat)
    else:
        one_point_per_timerow_output(separator, output_files, output_time_list,
                                     output, write_header, site_input, vcat)
Example #30
0
def main():
    options, flags = gs.parser()

    # lazy imports
    import grass.temporal as tgis

    # Get the options
    # Parser does not ensure that the input exists.
    input = options["input"]
    columns = options["columns"]
    order = options["order"]
    where = options["where"]
    separator = gs.separator(options["separator"])
    method = options["method"]
    granule = options["granule"]
    header = flags["u"]
    output = options["output"]
    output_format = options["format"]

    if output_format == "csv":
        if len(separator) > 1:
            gs.fatal(
                message_option_value_excludes_option_value(
                    option_name="format",
                    option_value=output_format,
                    excluded_option_name="separator",
                    excluded_option_value=separator,
                    reason=_(
                        "A standard CSV separator (delimiter) is only one character long"
                    ),
                )
            )
        if separator == "|":
            # We use comma as the default for separator, so we override the pipe.
            # This does not allow for users to generate CSV with pipe, but unlike
            # the C API, the Python interface specs does not allow reseting the default
            # except for setting it to an empty string which does not have a precedence
            # in the current code and the behavior is unclear.
            separator = ","
    if output_format in ["json", "yaml"] and header:
        gs.fatal(
            message_option_value_excludes_flag(
                option_name="format",
                option_value=output_format,
                flag_name="u",
                reason=_("Column names are always included"),
            )
        )
        # We ignore when separator is set for JSON and YAML because of the default
        # value which is always there (see above). Having no default and producing
        # an error when set would be more clear and would fit with using different
        # defaults for plain and CSV formats.
    elif (output_format == "line" or method == "comma") and separator == "|":
        # Same as for CSV: Custom default needed.
        # Pipe is currently not supported at all.
        separator = ","

    if method in ["delta", "deltagaps", "gran"]:
        if order:
            gs.fatal(
                message_option_value_excludes_option(
                    option_name="method",
                    option_value=method,
                    excluded_option_name="order",
                    reason=_("Values are always ordered by start_time"),
                )
            )
        if columns:
            columns_list = columns.split(",")
            for column in [
                "semantic_label",
                "creator",
                "temporal_type",
                "creation_time",
                "north",
                "south",
                "west",
                "east",
                "nsres",
                "ewres",
                "cols",
                "rows",
                "number_of_cells",
                "min",
                "max",
            ]:
                if column in columns_list:
                    gs.fatal(
                        message_option_value_excludes_option_value(
                            option_name="method",
                            option_value=method,
                            excluded_option_name="columns",
                            excluded_option_value=columns,
                            reason=_(
                                "Column '{name}' is not available with the method '{method}'"
                            ).format(name=column, method=method),
                        )
                    )
    elif columns:
        columns_list = columns.split(",")
        for column in ["interval_length", "distance_from_begin"]:
            if column in columns_list:
                gs.fatal(
                    message_option_value_excludes_option_value(
                        option_name="method",
                        option_value=method,
                        excluded_option_name="columns",
                        excluded_option_value=columns,
                        reason=_(
                            "Column '{name}' is not available with the method '{method}'"
                        ).format(name=column, method=method),
                    )
                )
    if output_format == "line" or method == "comma":
        columns_list = columns.split(",")
        if len(columns_list) > 1:
            gs.fatal(
                message_option_value_excludes_option_value(
                    option_name="format",
                    option_value=output_format,
                    excluded_option_name="columns",
                    excluded_option_value=columns,
                    reason=_("Only one column is allowed (not {num_columns})").format(
                        num_columns=len(columns_list)
                    ),
                )
            )
    if method == "gran" and where:
        gs.fatal(
            message_option_value_excludes_option(
                option_name="method",
                option_value=method,
                excluded_option_name="where",
                reason=_("All maps are always listed"),
            )
        )

    # Make sure the temporal database exists
    tgis.init()

    tgis.list_maps_of_stds(
        "strds",
        input,
        columns,
        order,
        where,
        separator,
        method,
        header,
        granule,
        outpath=output,
        output_format=output_format,
    )
def main():

    global insert_sql
    insert_sql = None
    global temporary_vect
    temporary_vect = None
    global stats_temp_file
    stats_temp_file = None

    segment_map = options['map']
    csvfile = options['csvfile'] if options['csvfile'] else []
    vectormap = options['vectormap'] if options['vectormap'] else []
    global rasters
    rasters = options['rasters'].split(',') if options['rasters'] else []
    area_measures = options['area_measures'].split(',') if (
        options['area_measures'] and not flags['s']) else []
    if area_measures:
        if not gscript.find_program('r.object.geometry', '--help'):
            message = _(
                "You need to install the addon r.object.geometry to be able")
            message += _(" to calculate area measures.\n")
            message += _(
                " You can install the addon with 'g.extension r.object.geometry'"
            )
            gscript.fatal(message)
    neighborhood = True if flags['n'] else False
    if neighborhood:
        if not gscript.find_program('r.neighborhoodmatrix', '--help'):
            message = _(
                "You need to install the addon r.neighborhoodmatrix to be able"
            )
            message += _(" to calculate area measures.\n")
            message += _(
                " You can install the addon with 'g.extension r.neighborhoodmatrix'"
            )
            gscript.fatal(message)

    raster_statistics = options['raster_statistics'].split(
        ',') if options['raster_statistics'] else []
    separator = gscript.separator(options['separator'])
    processes = int(options['processes'])

    output_header = ['cat']
    output_dict = collections.defaultdict(list)

    raster_stat_dict = {
        'zone': 0,
        'min': 4,
        'third_quart': 16,
        'max': 5,
        'sum': 12,
        'null_cells': 3,
        'median': 15,
        'label': 1,
        'first_quart': 14,
        'range': 6,
        'mean_of_abs': 8,
        'stddev': 9,
        'non_null_cells': 2,
        'coeff_var': 11,
        'variance': 10,
        'sum_abs': 13,
        'perc_90': 17,
        'mean': 7
    }

    geometry_stat_dict = {
        'cat': 0,
        'area': 1,
        'perimeter': 2,
        'compact_square': 3,
        'compact_circle': 4,
        'fd': 5,
        'xcoords': 6,
        'ycoords': 7
    }

    if flags['r']:
        gscript.use_temp_region()
        gscript.run_command('g.region', raster=segment_map)

    stats_temp_file = gscript.tempfile()
    if area_measures:
        gscript.message(_("Calculating geometry statistics..."))
        output_header += area_measures
        stat_indices = [geometry_stat_dict[x] for x in area_measures]
        gscript.run_command('r.object.geometry',
                            input_=segment_map,
                            output=stats_temp_file,
                            overwrite=True,
                            quiet=True)

        firstline = True
        with open(stats_temp_file, 'r') as fin:
            for line in fin:
                if firstline:
                    firstline = False
                    continue
                values = line.rstrip().split('|')
                output_dict[values[0]] = [values[x] for x in stat_indices]

    if rasters:
        if not flags['c']:
            gscript.message(_("Checking usability of raster maps..."))
            rasters_to_remove = []
            for raster in rasters:
                null_values_found = False
                if not gscript.find_file(raster, element='cell')['name']:
                    gscript.message(_("Cannot find raster '%s'" % raster))
                    gscript.message(_("Removing this raster from list."))
                    rasters_to_remove.append(raster)
                    continue
                current_mapset = gscript.gisenv()['MAPSET']
                if gscript.find_file('MASK',
                                     element='cell',
                                     mapset=current_mapset)['name']:

                    null_test = gscript.read_command('r.stats',
                                                     flags='N',
                                                     input_=['MASK', raster],
                                                     quiet=True).splitlines()
                    if '1 *' in null_test:
                        null_values_found = True

                else:
                    raster_info = gscript.parse_command('r.univar',
                                                        flags='g',
                                                        map_=raster,
                                                        quiet=True)
                    if len(raster_info) == 0 or int(
                            raster_info['null_cells']) > 0:
                        null_values_found = True

                if null_values_found:
                    message = 'Raster <%s> contains null values.\n' % raster
                    message += 'This can lead to errors in the calculations.\n'
                    message += 'Check region settings and raster extent.\n'
                    message += 'Possibly fill null values of raster.\n'
                    message += 'Removing this raster from list.'
                    gscript.warning(message)
                    rasters_to_remove.append(raster)

            for raster in rasters_to_remove:
                rasters.remove(raster)

        if len(rasters) > 0:
            gscript.message(
                _("Calculating statistics for the following raster maps:"))
            gscript.message(','.join(rasters))
            if len(rasters) < processes:
                processes = len(rasters)
                gscript.message(
                    _("Only one process per raster. Reduced number of processes to %i."
                      % processes))

            stat_indices = [raster_stat_dict[x] for x in raster_statistics]
            pool = Pool(processes)
            func = partial(worker, segment_map, stats_temp_file)
            pool.map(func, rasters)
            pool.close()
            pool.join()

            for raster in rasters:
                rastername = raster.split('@')[0]
                rastername = rastername.replace('.', '_')
                temp_file = stats_temp_file + '.' + rastername
                output_header += [
                    rastername + "_" + x for x in raster_statistics
                ]
                firstline = True
                with open(temp_file, 'r') as fin:
                    for line in fin:
                        if firstline:
                            firstline = False
                            continue
                        values = line.rstrip().split('|')
                        output_dict[values[0]] = output_dict[values[0]] + [
                            values[x] for x in stat_indices
                        ]

    # Calculating neighborhood statistics if requested
    if neighborhood:

        gscript.message(_("Calculating neighborhood statistics..."))

        # Add neighbordhood statistics to headers
        original_nb_values = len(output_header) - 1
        new_headers = ['neighbors_count']
        for i in range(1, len(output_header)):
            new_headers.append('%s_nbrmean' % output_header[i])
            new_headers.append('%s_nbrstddev' % output_header[i])

        output_header += new_headers

        # Get sorted neighborhood matrix
        nbr_matrix = sorted([
            x.split('|')
            for x in gscript.read_command('r.neighborhoodmatrix',
                                          input_=segment_map,
                                          flags='d',
                                          quiet=True).splitlines()
        ])

        # Calculate mean and stddev of neighbor values for each variable in the
        # output_dict
        for key, group in groupby(nbr_matrix, lambda x: x[0]):
            d = {}
            for i in range(original_nb_values):
                d[i] = (0, 0, 0)
            nbrlist = [str(x[1]) for x in group]
            if len(nbrlist) > 1:
                for nbr in nbrlist:
                    for i in range(original_nb_values):
                        d[i] = update(d[i], float(output_dict[nbr][i]))
                output_dict[key] = output_dict[key] + [str(len(nbrlist))]
                output_dict[key] = output_dict[key] + [
                    str(i) for sub in [finalize(x) for x in d.values()]
                    for i in sub
                ]
            else:
                newvalues = ['1']
                nbr = nbrlist[0]
                for i in range(original_nb_values):
                    newvalues.append(output_dict[nbr][i])
                    newvalues.append('0')
                output_dict[key] = output_dict[key] + newvalues

    message = _("Some values could not be calculated for the objects below. ")
    message += _("These objects are thus not included in the results. ")
    message += _("HINT: Check some of the raster maps for null values ")
    message += _("and possibly fill these values with r.fillnulls.")
    error_objects = []

    if csvfile:
        with open(csvfile, 'w') as f:
            f.write(separator.join(output_header) + "\n")
            for key in output_dict:
                if len(output_dict[key]) + 1 == len(output_header):
                    f.write(key + separator +
                            separator.join(output_dict[key]) + "\n")
                else:
                    error_objects.append(key)
        f.close()

    if vectormap:
        gscript.message(_("Creating output vector map..."))
        temporary_vect = 'segmstat_tmp_vect_%d' % os.getpid()
        gscript.run_command('r.to.vect',
                            input_=segment_map,
                            output=temporary_vect,
                            type_='area',
                            flags='vt',
                            overwrite=True,
                            quiet=True)

        insert_sql = gscript.tempfile()
        fsql = open(insert_sql, 'w')
        fsql.write('BEGIN TRANSACTION;\n')
        if gscript.db_table_exist(temporary_vect):
            if gscript.overwrite():
                fsql.write('DROP TABLE %s;' % temporary_vect)
            else:
                gscript.fatal(
                    _("Table %s already exists. Use --o to overwrite" %
                      temporary_vect))
        create_statement = 'CREATE TABLE ' + temporary_vect + ' (cat int PRIMARY KEY);\n'
        fsql.write(create_statement)
        for header in output_header[1:]:
            addcol_statement = 'ALTER TABLE %s ADD COLUMN %s double precision;\n' % (
                temporary_vect, header)
            fsql.write(addcol_statement)
        for key in output_dict:
            if len(output_dict[key]) + 1 == len(output_header):
                sql = "INSERT INTO %s VALUES (%s, %s);\n" % (
                    temporary_vect, key, ",".join(output_dict[key]))
                sql = sql.replace('inf', 'NULL')
                sql = sql.replace('nan', 'NULL')
                fsql.write(sql)
            else:
                if not csvfile:
                    error_objects.append(key)

        fsql.write('END TRANSACTION;')
        fsql.close()

        gscript.run_command('db.execute', input=insert_sql, quiet=True)
        gscript.run_command('v.db.connect',
                            map_=temporary_vect,
                            table=temporary_vect,
                            quiet=True)
        gscript.run_command('g.copy',
                            vector="%s,%s" % (temporary_vect, vectormap),
                            quiet=True)

    if error_objects:
        object_string = ', '.join(error_objects[:100])
        message += _(
            "\n\nObjects with errors (only first 100 are shown):\n%s" %
            object_string)
        gscript.message(message)