Esempio n. 1
0
def blockTraining(rawData, theta, xBlockRange, yBlockRange):

    """
	Takes in the data from readData and sorts it to the format ML
	package wants
	then trains a classifier
	"""

    data = []
    labels = []
    x0 = xBlockRange[0]
    x1 = xBlockRange[1]
    y0 = yBlockRange[0]
    y1 = yBlockRange[1]
    for i in range(len(rawData["lon"])):
        xi = rawData["lon"][i]
        yi = rawData["lat"][i]
        if x0 < xi < x1 and y0 < yi < y1:
            data.append([xi, yi])
            labels.append(mapping.downLabels(rawData["classif"][i]))
    data, labels = mapping.cleanDoubles(data, labels)
    rfc = RandomForestClassifier()
    rfc.fit(data, labels)

    return rfc
Esempio n. 2
0
def running(dx, withData=True, compare=True, proj="M", filename="fullData.txt"):

    """
	dx gives the resolution, dx=1 for 1x1 grid
	withData is a boolean, withData=True plots the data points on top the map
	projection has the same options as gmt, Mercator projection is the default
	filename is the name of the file that stores the data
	"""

    """
	Reading data
	"""
    theta0 = [600000]
    for theta in theta0:
        xpoints = np.arange(-180, 180, dx)
        ypoints = np.arange(-90 + dx, 90, dx)
        if filename.endswith("txt"):
            rawData = readData.readTxt(filename)
        elif filename.endswith("xlsx"):
            rawData = readData.readCols(filename)
        else:
            print "I don't think I can handle this format"
            return
        """
		Classifier being trained
		"""
        rfc = mapping.training(rawData)
        """
		Map being made with the classifier
		"""
        data = [(rawData["lon"][i], rawData["lat"][i]) for i in range(len(rawData["lon"]))]
        labels = [mapping.downLabels(i) for i in rawData["classif"]]
        data, labels = mapping.cleanDoubles(data, labels)
        scores = cross_val_score(rfc, data, labels, cv=5)
        print ("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
        prediction = mapping.mapping(xpoints, ypoints, rfc)
        """
		Map being written to a file
		"""
        writeResults.writePredictions(
            xpoints,
            ypoints,
            prediction,
            header="> Predictions made with training.py",
            newFilename="seabed_lithology_regular_grid.txt",
        )
        """
		GMT file written
		"""
        writeResults.makeMap(dx, withData, proj)
        """
		Postscript called to call gmt
		"""
        writeResults.gmtMap()
        print len(rawData["lon"]), len(rawData["classif"])
        """
		Statisctics being compared
		"""
        if compare:
            compareStats(rawData, theta)
Esempio n. 3
0
def compareStats(rawData, theta):

    data = [[rawData["lon"][i], rawData["lat"][i]] for i in range(len(rawData["lon"]))]
    labels = rawData["classif"]

    data, labels = mapping.cleanDoubles(data, labels)

    rfc = GaussianProcess(regr="linear", theta0=theta)
    rfc.fit(data, labels)
    scores = cross_val_score(rfc, data, labels, cv=5)
    print ("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))