コード例 #1
ファイル: blocks.py プロジェクト: hjonasson/DataMining
def blockTraining(rawData, theta, xBlockRange, yBlockRange):

	Takes in the data from readData and sorts it to the format ML
	package wants
	then trains a classifier

    data = []
    labels = []
    x0 = xBlockRange[0]
    x1 = xBlockRange[1]
    y0 = yBlockRange[0]
    y1 = yBlockRange[1]
    for i in range(len(rawData["lon"])):
        xi = rawData["lon"][i]
        yi = rawData["lat"][i]
        if x0 < xi < x1 and y0 < yi < y1:
            data.append([xi, yi])
    data, labels = mapping.cleanDoubles(data, labels)
    rfc = RandomForestClassifier()
    rfc.fit(data, labels)

    return rfc
コード例 #2
ファイル: running.py プロジェクト: hjonasson/DataMining
def running(dx, withData=True, compare=True, proj="M", filename="fullData.txt"):

	dx gives the resolution, dx=1 for 1x1 grid
	withData is a boolean, withData=True plots the data points on top the map
	projection has the same options as gmt, Mercator projection is the default
	filename is the name of the file that stores the data

	Reading data
    theta0 = [600000]
    for theta in theta0:
        xpoints = np.arange(-180, 180, dx)
        ypoints = np.arange(-90 + dx, 90, dx)
        if filename.endswith("txt"):
            rawData = readData.readTxt(filename)
        elif filename.endswith("xlsx"):
            rawData = readData.readCols(filename)
            print "I don't think I can handle this format"
		Classifier being trained
        rfc = mapping.training(rawData)
		Map being made with the classifier
        data = [(rawData["lon"][i], rawData["lat"][i]) for i in range(len(rawData["lon"]))]
        labels = [mapping.downLabels(i) for i in rawData["classif"]]
        data, labels = mapping.cleanDoubles(data, labels)
        scores = cross_val_score(rfc, data, labels, cv=5)
        print ("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
        prediction = mapping.mapping(xpoints, ypoints, rfc)
		Map being written to a file
            header="> Predictions made with training.py",
		GMT file written
        writeResults.makeMap(dx, withData, proj)
		Postscript called to call gmt
        print len(rawData["lon"]), len(rawData["classif"])
		Statisctics being compared
        if compare:
            compareStats(rawData, theta)
コード例 #3
ファイル: running.py プロジェクト: hjonasson/DataMining
def compareStats(rawData, theta):

    data = [[rawData["lon"][i], rawData["lat"][i]] for i in range(len(rawData["lon"]))]
    labels = rawData["classif"]

    data, labels = mapping.cleanDoubles(data, labels)

    rfc = GaussianProcess(regr="linear", theta0=theta)
    rfc.fit(data, labels)
    scores = cross_val_score(rfc, data, labels, cv=5)
    print ("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))