예제 #1
0
def main(date, delete, keywords=[], byLeaf=True, saveProportion=0.5):
    """
    Generates ML training and testing data from extracted CSV files

    :param date: (string) Data collection date YYYY_MMDD
    :param delete: (boolean) Determines whether or not to delete the existing
                             training/testing data files
    :param keywords: (list of strings) Data filename keywords
    :param byLeaf: (boolean) Should we separate the train/test data
                             by leaf, or should we randomly separate
                             the data according to a set proportion?
    :param saveProportion: (float) Amount of each CSV file to save as training
                                   and testing data.

    :return: (None)
    """

    # Get the data files we will be looking at
    dataPath = DATA_DIRECTORIES[date]
    dataFilenames = FileIO.getDatafileNames(dataPath, keywords)

    # If desired, remove the old training data and start fresh
    if delete:

        mlDataPath = DATA_DIRECTORIES[date+"_ML"]
        trainingDataPath = os.path.join(mlDataPath, TRAINING_DATA_PATH)
        testingDataPath = os.path.join(mlDataPath, TESTING_DATA_PATH)
        sampleCountsPath = os.path.join(mlDataPath, SAMPLE_COUNTS_PATH)

        if os.path.exists(trainingDataPath):
            os.remove(trainingDataPath)

        if os.path.exists(testingDataPath):
            os.remove(testingDataPath)

        if os.path.exists(sampleCountsPath):
            os.remove(sampleCountsPath)

    # Consolidate the CSV files into training and testing data
    (train_X, train_y, test_X, test_y) = DataManipulation.separateTrainTest(dataPath, 
                                                                            dataFilenames, 
                                                                            byLeaf=byLeaf, 
                                                                            saveProportion=saveProportion)

    # Save the training and testing data in the proper spot
    FileIO.saveTrainingData(date, train_X, train_y)
    FileIO.saveTestingData(date, test_X, test_y)
예제 #2
0
def main(date, wavelengths, keywords=[], allSpectra=False):
    """
    Plot three wavelengths against each other from a specified set of data.

    :param date: (string) Data collection date YYYY_MMDD
    :param wavelengths: (3-tuple) Wavelengths to plot against another
    :param keywords: (list of strings) Strings which should be included in the 
                                       filenames of files being plotted
    :allSpectra: (boolean) Determines where there is one point for every spectra
                           collected, or one point for every leaf file

    :return: (None)
    """

    # Convert the wavelengths to indices for accessing the data
    wavelengthIndices = map(wavelengthToIndex, wavelengths)
    wavelengthIndex1 = wavelengthIndices[0]
    wavelengthIndex2 = wavelengthIndices[1]
    wavelengthIndex3 = wavelengthIndices[2]


    # Get the data files we will be looking at
    dataPath = DATA_DIRECTORIES[date]
    filesToPlot = FileIO.getDatafileNames(dataPath, keywords)

    pointsDR = []
    pointsGR = []
    pointsSUS = []

    for name in filesToPlot:

        tokens = name[0:-4].split('_')
        map(lambda x: x.lower(), tokens)

        plant = tokens[0]
        resistance = tokens[1]

        filePath = os.path.join(dataPath, name)
        data = FileIO.loadCSV(filePath)

        try:
            rows, columns = data.shape
            if columns < 2:
                continue
        except:
            continue

        if allSpectra:

            xValues = data[:, wavelengthIndex1]
            yValues = data[:, wavelengthIndex2]
            zValues = data[:, wavelengthIndex3]

            points = np.zeros((rows, 3))
            points[:, 0] = xValues
            points[:, 1] = yValues
            points[:, 2] = zValues
                
            if resistance == SUSCEPTIBLE:
                if pointsSUS == []:
                    pointsSUS = points
                else:
                    pointsSUS = np.append(pointsSUS, points, axis=0)

            elif resistance == DR_RESISTANT:
                if pointsDR == []:
                    pointsDR = points
                else:
                    pointsDR = np.append(pointsDR, points, axis=0)

            elif resistance == GR_RESISTANT:
                if pointsGR == []:
                    pointsGR = points
                else:
                    pointsGR = np.append(pointsGR, points, axis=0)
            else:
                raise Exception("Unknown resistance type: " + resistance)

        else:

            mean = np.mean(data, axis=0)
            meanValue1 = mean[wavelengthIndex1]
            meanValue2 = mean[wavelengthIndex2]
            meanValue3 = mean[wavelengthIndex3]

            if resistance == SUSCEPTIBLE:
                pointsSUS.append([meanValue1, meanValue2, meanValue3])
            elif resistance == DR_RESISTANT:
                pointsDR.append([meanValue1, meanValue2, meanValue3])
            elif resistance == GR_RESISTANT:
                pointsGR.append([meanValue1, meanValue2, meanValue3])
            else:
                raise Exception("Unknown resistance type: " + resistance)

    # Plot the wavelengths
    pointsDR = np.array(pointsDR)
    pointsGR = np.array(pointsGR)
    pointsSUS = np.array(pointsSUS)

    traceSUS = plotPoints(pointsSUS, RESISTANCE_STRINGS[SUSCEPTIBLE], 'rgba(255, 0, 0, 0)')
    traceDR = plotPoints(pointsDR, RESISTANCE_STRINGS[DR_RESISTANT], 'rgba(0, 255, 0, 0)')
    traceGR = plotPoints(pointsGR, RESISTANCE_STRINGS[GR_RESISTANT], 'rgba(0, 0, 255, 0)')

    layout = go.Layout(
        title='3D Wavelength Plot',
        scene=go.Scene(
            xaxis=go.XAxis(title='Reflectance @ ' + str(wavelengths[0]) + ' nm'),
            yaxis=go.YAxis(title='Reflectance @ ' + str(wavelengths[1]) + ' nm'),
            zaxis=go.ZAxis(title='Reflectance @ ' + str(wavelengths[2]) + ' nm')
        )
    )

    data = [traceSUS, traceDR, traceGR]
    fig = go.Figure(data=data, layout=layout)
    py.iplot(fig, filename='3D Wavelength Plot')
def main(date, wavelengths, plotLeaves, binning, keywords=[]):
    """
    Plot the histogram of a specified list of wavelengths.

    :param date: (string) Data collection date YYYY_MMDD
    :param wavelengths: (list) Wavelengths to plot histograms
    :param plotLeaves: (boolean) Plot only a single point per 
                                 leaf vs. all spectra in a leaf
    :param binning: (float) Wavelength binning width (in nm)
    :param keywords: (list of strings) Strings which should be included in the
                                       filenames of files being plotted

    :return: (None)
    """

    numHistograms = len(wavelengths)

    # Get the data files we will be looking at
    dataPath = DATA_DIRECTORIES[date]
    filesToPlot = FileIO.getDatafileNames(dataPath, keywords)

    pointsDR = np.zeros((1, numHistograms))
    pointsGR = np.zeros((1, numHistograms))
    pointsSUS = np.zeros((1, numHistograms))

    for name in filesToPlot:

        tokens = name[0:-4].split('_')
        map(lambda x: x.lower(), tokens)

        plant = tokens[0]
        resistance = tokens[1]
        imageType = tokens[2]
        index = int(tokens[4])

        filePath = os.path.join(dataPath, name)
        data = FileIO.loadCSV(filePath)

        # Extract the relevant data from the spectra in the data file
        try:
            if not binning:
                wavelengthIndices = map(wavelengthToIndex, wavelengths)
                histogramData = data[:, wavelengthIndices]
            else:
                
                indexRegions = map(lambda x: wavelengthRegionToIndices(x, binning), wavelengths)
                rows, columns = data.shape
                histogramData = np.zeros((rows, numHistograms))

                for i in xrange(numHistograms):

                    histogramData[:, i] = map(lambda j: np.mean(data[j,indexRegions[i]]), xrange(rows))


        except Exception, e:
            print "Error with file:", name
            continue

        if plotLeaves:

            meanLeaf = map(lambda i: np.mean(histogramData[:,i]), xrange(numHistograms))

            if resistance == SUSCEPTIBLE:
                pointsSUS = np.append(pointsSUS, [meanLeaf], axis=0)
            elif resistance == DR_RESISTANT:
                pointsDR = np.append(pointsDR, [meanLeaf], axis=0)
            elif resistance == GR_RESISTANT:
                pointsGR = np.append(pointsGR, [meanLeaf], axis=0)
            else:
                raise Exception("Unknown resistance type: " + resistance)

        else:

            if resistance == SUSCEPTIBLE:
                pointsSUS = np.append(pointsSUS, histogramData, axis=0)
            elif resistance == DR_RESISTANT:
                pointsDR = np.append(pointsDR, histogramData, axis=0)
            elif resistance == GR_RESISTANT:
                pointsGR = np.append(pointsGR, histogramData, axis=0)
            else:
                raise Exception("Unknown resistance type: " + resistance)