def main(date, delete, keywords=[], byLeaf=True, saveProportion=0.5): """ Generates ML training and testing data from extracted CSV files :param date: (string) Data collection date YYYY_MMDD :param delete: (boolean) Determines whether or not to delete the existing training/testing data files :param keywords: (list of strings) Data filename keywords :param byLeaf: (boolean) Should we separate the train/test data by leaf, or should we randomly separate the data according to a set proportion? :param saveProportion: (float) Amount of each CSV file to save as training and testing data. :return: (None) """ # Get the data files we will be looking at dataPath = DATA_DIRECTORIES[date] dataFilenames = FileIO.getDatafileNames(dataPath, keywords) # If desired, remove the old training data and start fresh if delete: mlDataPath = DATA_DIRECTORIES[date+"_ML"] trainingDataPath = os.path.join(mlDataPath, TRAINING_DATA_PATH) testingDataPath = os.path.join(mlDataPath, TESTING_DATA_PATH) sampleCountsPath = os.path.join(mlDataPath, SAMPLE_COUNTS_PATH) if os.path.exists(trainingDataPath): os.remove(trainingDataPath) if os.path.exists(testingDataPath): os.remove(testingDataPath) if os.path.exists(sampleCountsPath): os.remove(sampleCountsPath) # Consolidate the CSV files into training and testing data (train_X, train_y, test_X, test_y) = DataManipulation.separateTrainTest(dataPath, dataFilenames, byLeaf=byLeaf, saveProportion=saveProportion) # Save the training and testing data in the proper spot FileIO.saveTrainingData(date, train_X, train_y) FileIO.saveTestingData(date, test_X, test_y)
def main(date, wavelengths, keywords=[], allSpectra=False): """ Plot three wavelengths against each other from a specified set of data. :param date: (string) Data collection date YYYY_MMDD :param wavelengths: (3-tuple) Wavelengths to plot against another :param keywords: (list of strings) Strings which should be included in the filenames of files being plotted :allSpectra: (boolean) Determines where there is one point for every spectra collected, or one point for every leaf file :return: (None) """ # Convert the wavelengths to indices for accessing the data wavelengthIndices = map(wavelengthToIndex, wavelengths) wavelengthIndex1 = wavelengthIndices[0] wavelengthIndex2 = wavelengthIndices[1] wavelengthIndex3 = wavelengthIndices[2] # Get the data files we will be looking at dataPath = DATA_DIRECTORIES[date] filesToPlot = FileIO.getDatafileNames(dataPath, keywords) pointsDR = [] pointsGR = [] pointsSUS = [] for name in filesToPlot: tokens = name[0:-4].split('_') map(lambda x: x.lower(), tokens) plant = tokens[0] resistance = tokens[1] filePath = os.path.join(dataPath, name) data = FileIO.loadCSV(filePath) try: rows, columns = data.shape if columns < 2: continue except: continue if allSpectra: xValues = data[:, wavelengthIndex1] yValues = data[:, wavelengthIndex2] zValues = data[:, wavelengthIndex3] points = np.zeros((rows, 3)) points[:, 0] = xValues points[:, 1] = yValues points[:, 2] = zValues if resistance == SUSCEPTIBLE: if pointsSUS == []: pointsSUS = points else: pointsSUS = np.append(pointsSUS, points, axis=0) elif resistance == DR_RESISTANT: if pointsDR == []: pointsDR = points else: pointsDR = np.append(pointsDR, points, axis=0) elif resistance == GR_RESISTANT: if pointsGR == []: pointsGR = points else: pointsGR = np.append(pointsGR, points, axis=0) else: raise Exception("Unknown resistance type: " + resistance) else: mean = np.mean(data, axis=0) meanValue1 = mean[wavelengthIndex1] meanValue2 = mean[wavelengthIndex2] meanValue3 = mean[wavelengthIndex3] if resistance == SUSCEPTIBLE: pointsSUS.append([meanValue1, meanValue2, meanValue3]) elif resistance == DR_RESISTANT: pointsDR.append([meanValue1, meanValue2, meanValue3]) elif resistance == GR_RESISTANT: pointsGR.append([meanValue1, meanValue2, meanValue3]) else: raise Exception("Unknown resistance type: " + resistance) # Plot the wavelengths pointsDR = np.array(pointsDR) pointsGR = np.array(pointsGR) pointsSUS = np.array(pointsSUS) traceSUS = plotPoints(pointsSUS, RESISTANCE_STRINGS[SUSCEPTIBLE], 'rgba(255, 0, 0, 0)') traceDR = plotPoints(pointsDR, RESISTANCE_STRINGS[DR_RESISTANT], 'rgba(0, 255, 0, 0)') traceGR = plotPoints(pointsGR, RESISTANCE_STRINGS[GR_RESISTANT], 'rgba(0, 0, 255, 0)') layout = go.Layout( title='3D Wavelength Plot', scene=go.Scene( xaxis=go.XAxis(title='Reflectance @ ' + str(wavelengths[0]) + ' nm'), yaxis=go.YAxis(title='Reflectance @ ' + str(wavelengths[1]) + ' nm'), zaxis=go.ZAxis(title='Reflectance @ ' + str(wavelengths[2]) + ' nm') ) ) data = [traceSUS, traceDR, traceGR] fig = go.Figure(data=data, layout=layout) py.iplot(fig, filename='3D Wavelength Plot')
def main(date, wavelengths, plotLeaves, binning, keywords=[]): """ Plot the histogram of a specified list of wavelengths. :param date: (string) Data collection date YYYY_MMDD :param wavelengths: (list) Wavelengths to plot histograms :param plotLeaves: (boolean) Plot only a single point per leaf vs. all spectra in a leaf :param binning: (float) Wavelength binning width (in nm) :param keywords: (list of strings) Strings which should be included in the filenames of files being plotted :return: (None) """ numHistograms = len(wavelengths) # Get the data files we will be looking at dataPath = DATA_DIRECTORIES[date] filesToPlot = FileIO.getDatafileNames(dataPath, keywords) pointsDR = np.zeros((1, numHistograms)) pointsGR = np.zeros((1, numHistograms)) pointsSUS = np.zeros((1, numHistograms)) for name in filesToPlot: tokens = name[0:-4].split('_') map(lambda x: x.lower(), tokens) plant = tokens[0] resistance = tokens[1] imageType = tokens[2] index = int(tokens[4]) filePath = os.path.join(dataPath, name) data = FileIO.loadCSV(filePath) # Extract the relevant data from the spectra in the data file try: if not binning: wavelengthIndices = map(wavelengthToIndex, wavelengths) histogramData = data[:, wavelengthIndices] else: indexRegions = map(lambda x: wavelengthRegionToIndices(x, binning), wavelengths) rows, columns = data.shape histogramData = np.zeros((rows, numHistograms)) for i in xrange(numHistograms): histogramData[:, i] = map(lambda j: np.mean(data[j,indexRegions[i]]), xrange(rows)) except Exception, e: print "Error with file:", name continue if plotLeaves: meanLeaf = map(lambda i: np.mean(histogramData[:,i]), xrange(numHistograms)) if resistance == SUSCEPTIBLE: pointsSUS = np.append(pointsSUS, [meanLeaf], axis=0) elif resistance == DR_RESISTANT: pointsDR = np.append(pointsDR, [meanLeaf], axis=0) elif resistance == GR_RESISTANT: pointsGR = np.append(pointsGR, [meanLeaf], axis=0) else: raise Exception("Unknown resistance type: " + resistance) else: if resistance == SUSCEPTIBLE: pointsSUS = np.append(pointsSUS, histogramData, axis=0) elif resistance == DR_RESISTANT: pointsDR = np.append(pointsDR, histogramData, axis=0) elif resistance == GR_RESISTANT: pointsGR = np.append(pointsGR, histogramData, axis=0) else: raise Exception("Unknown resistance type: " + resistance)