def create_real_data(): global x_train, y_train, x_test, y_test csv = CSVHandler() x_train, y_train = csv.create_data_from_csv('train_selected_china.csv') x_test, y_test = csv.create_data_from_csv('test_selected_china.csv') x_train = x_train.reshape(x_train.shape[0], 28, 28, 1) x_test = x_test.reshape(x_test.shape[0], 28, 28, 1) input_shape = (28, 28, 1) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255
def processLidarData(data_folder, dataset, limit): csvHand = CSVHandler() print("Generating LIDAR ...") lidarDataDir = dataset + "/" + "lidar_data_s008/" inputDataDir = data_folder + "/lidar_input/" coordFileName = "CoordVehiclesRxPerScene_s008" coordURL = dataset + "/" + coordFileName + ".csv" if not (os.path.exists(inputDataDir)): os.mkdir(inputDataDir) print("Directory '% s' created" % inputDataDir) nSamples, lastEpisode, epi_scen = csvHand.getEpScenValbyRec(coordURL) obstacles_matrix_array_lidar = np.ones((nSamples, 20, 200, 10), np.int8) lidar_inputs_train = [] lidar_inputs_test = [] with open(coordURL) as csvfile: reader = csv.DictReader(csvfile) id_count = 0 alreadyInMemoryEpisode = -1 for row in reader: episodeNum = int(row["EpisodeID"]) # if (episodeNum < numEpisodeStart) | (episodeNum > numEpisodeEnd): # continue #skip episodes out of the interval isValid = row["Val"] # V or I are the first element of the list thisLine if isValid == "I": continue # skip invalid entries if episodeNum != alreadyInMemoryEpisode: # just read if a new episode print("Reading Episode " + str(episodeNum) + " ...") currentEpisodesInputs = np.load( os.path.join( lidarDataDir, "obstacles_e_" + str(episodeNum) + ".npz" ) ) obstacles_matrix_array = currentEpisodesInputs["obstacles_matrix_array"] alreadyInMemoryEpisode = episodeNum # update for other iterations r = int(row["VehicleArrayID"]) # get receiver number obstacles_matrix_array_lidar[id_count] = obstacles_matrix_array[r] id_count = id_count + 1 lidar_inputs_test = obstacles_matrix_array_lidar[limit:] lidar_inputs_train = obstacles_matrix_array_lidar[:limit] # train np.savez(inputDataDir + "lidar_train.npz", input=lidar_inputs_train) # test np.savez(inputDataDir + "lidar_validation.npz", input=lidar_inputs_test)
def getStatistics(code=None): """ Takes: carrier code (str) Query variables: month (str), content-type (str), airportCode (str) Returns: flights-uri, minutes-uri, amount-uri """ ## Load args ## month = request.args.get("month") contentType = request.args.get("content-type") airportCode = request.args.get("airport-code") if (contentType == "None" or contentType is None): contentType = "application/json" print(str(month)) queryString = "?airport-code=" + str(airportCode) + "&content-type=" + str( contentType) + "&month=" + str(month) ############### ## Logic ## if (code is None): flask.abort(400, "400(invalid paramater): carrier code invalid(None)") else: carrier = Carrier.query.filter_by(code=code).first() if (carrier is None): flask.abort(400, "400(invalid paramater): carrier code invalid") else: if (airportCode is None): dict = { "flights-uri": "/carriers/" + code + "/statistics/flights" + queryString, "minutes-uri": "/carriers/" + code + "/statistics/delays/minutes" + queryString, "amount-uri": "/carriers/" + code + "/statistics/delays/amount" + queryString } else: ## Same as above dict = { "flights-uri": "/carriers/" + code + "/statistics/flights" + queryString, "minutes-uri": "/carriers/" + code + "/statistics/delays/minutes" + queryString, "amount-uri": "/carriers/" + code + "/statistics/delays/amount" + queryString } if (contentType == "text/csv"): return CSVHandler.getStatisticsCSV(dictionary=dict) else: return json.dumps(dict)
def processCoordinates(data_folder, dataset, rsu_coord, area_shp): print("Generating Beams ...") csvHand = CSVHandler() inputDataDir = data_folder coordFileName = "CoordVehiclesRxPerScene_s008" coordURL = dataset + coordFileName + ".csv" coordinates_train, context_train, coordinates_test, context_test = csvHand.getCoord( coordURL, 1564) area_shp = [ area_shp[0] - rsu_coord[0], area_shp[1] - rsu_coord[1], area_shp[2] - rsu_coord[0], area_shp[3] - rsu_coord[1], ] coordinates_train = [[(float(a) - float(b)) / c for a, b, c in zip(x, rsu_coord, area_shp[2:])] for x in coordinates_train ] # coordinates_train - rsu_coord coordinates_test = [[(float(a) - float(b) / c) for a, b, c in zip(x, rsu_coord, area_shp[2:])] for x in coordinates_test ] # coordinates_test - rsu_coord train_channels = len(coordinates_train) # train np.savez(inputDataDir + "coord_train" + ".npz", coordinates=coordinates_train) np.savez(inputDataDir + "context_train" + ".npz", context=context_train) # test np.savez(inputDataDir + "coord_validation" + ".npz", coordinates=coordinates_test) np.savez(inputDataDir + "context_test" + ".npz", context=context_test) print("Coord npz files saved!") return train_channels
def processLidarData(num, csv_path): csvHand = CSVHandler() print('Generating LIDAR ...') lidarDataDir = './obstacles_' + num + '/' inputDataDir = './' coordFileName = csv_path coordURL = coordFileName if not (os.path.exists(inputDataDir)): os.mkdir(inputDataDir) print("Directory '% s' created" % inputDataDir) nSamples, lastEpisode, epi_scen = csvHand.getEpScenValbyRec(coordURL) obstacles_matrix_array_lidar = np.ones((nSamples, 180, 330, 10), np.int8) with open(coordURL) as csvfile: reader = csv.DictReader(csvfile) id_count = 0 alreadyInMemoryEpisode = -1 for row in reader: episodeNum = int(row['EpisodeID']) #if (episodeNum < numEpisodeStart) | (episodeNum > numEpisodeEnd): # continue #skip episodes out of the interval isValid = row[ 'Val'] #V or I are the first element of the list thisLine if isValid == 'I': continue #skip invalid entries if episodeNum != alreadyInMemoryEpisode: #just read if a new episode if (episodeNum % 10 == 0): print('Reading Episode ' + str(episodeNum) + ' ...') currentEpisodesInputs = np.load( os.path.join(lidarDataDir, 'obstacles_e_' + str(episodeNum) + '.npz')) obstacles_matrix_array = currentEpisodesInputs[ 'obstacles_matrix_array'] alreadyInMemoryEpisode = episodeNum #update for other iterations r = int(row['VehicleArrayID']) #get receiver number obstacles_matrix_array_lidar[id_count] = obstacles_matrix_array[r] id_count = id_count + 1 lidar_inputs_train = np.int8(obstacles_matrix_array_lidar) #train np.savez_compressed(inputDataDir + 'lidar_' + num + '.npz', input=lidar_inputs_train)
def processCoordinates(data_folder, dataset): print('Generating Beams ...') csvHand = CSVHandler() inputDataDir = data_folder + '/coord_input/' coordFileName = 'CoordVehiclesRxPerScene_s008' coordURL = dataset + '/' + coordFileName + '.csv' coordinates_train, coordinates_test = csvHand.getCoord(coordURL, 1564) train_channels = len(coordinates_train) #train np.savez(inputDataDir + 'coord_train' + '.npz', coordinates=coordinates_train) #test np.savez(inputDataDir + 'coord_validation' + '.npz', coordinates=coordinates_test) print('Coord npz files saved!') return train_channels
def processCoordinates(data_folder, dataset): print("Generating Beams ...") csvHand = CSVHandler() inputDataDir = data_folder + "/coord_input/" coordFileName = "CoordVehiclesRxPerScene_s008" coordURL = dataset + "/" + coordFileName + ".csv" coordinates_train, coordinates_test = csvHand.getCoord(coordURL, 1564) train_channels = len(coordinates_train) # train np.savez(inputDataDir + "coord_train" + ".npz", coordinates=coordinates_train) # test np.savez(inputDataDir + "coord_validation" + ".npz", coordinates=coordinates_test) print("Coord npz files saved!") return train_channels
def getAmount(code=None): """ Takes: carrier code (str) Query variables: month (str), content-type (str), delay (str) Returns: Dictionary of amount + month + carrier uri """ ## Load args ## month = request.args.get("month") contentType = request.args.get("content-type") airportCode = request.args.get("airport-code") if (contentType == "None" or contentType is None): contentType = "application/json" queryString = "?airport-code=" + str(airportCode) + "&content-type=" + str( contentType) + "&month=" + str(month) # Extra args functionality delayType = request.args.get("delay") if (airportCode == "None"): airportCode = None ############### ## Logic ## if (code is None): flask.abort(404, "404(Carrier not found)") else: if (airportCode is None): carrier = Carrier.query.filter_by(code=code).first() if (carrier is None): flask.abort(404, "404(Carrier not found)") dictionary = Utility.getAmountByMonth(realCarrier=carrier, month=month) dictionary["carrier-uri"] = "/carriers/" + code + queryString #return json.dumps(dictionary) else: carrier = Carrier.query.filter_by(code=code).first() airport = Airport.query.filter_by(code=airportCode).first() if (carrier is None or airport is None): flask.abort(404, "404(Carrier or Airport not found)") dictionary = Utility.getAmountByMonth(realCarrier=carrier, airport=airport, month=month) dictionary["carrier-uri"] = "/carriers/" + code + queryString #return json.dumps(dictionary) if (contentType == "text/csv"): return CSVHandler.getAmountCSV(dictionary) else: return json.dumps(dictionary)
pass def preprocess(self, train, test, except_num=False): all_data = pd.concat((train.drop('SalePrice', axis=1), test), axis=0) all_data = all_data.fillna(all_data.mean()) # fill NA and Nan # delete columns which have number type if except_num: for column in all_data.columns: dtype = all_data[column].dtype if dtype in ['float64', 'int64']: all_data = all_data.drop(column, axis=1) else: pass all_data = pd.get_dummies(all_data) all_data = all_data.fillna(all_data.mean()) train = all_data[:train.shape[0]] test = all_data[train.shape[0]:] return train, test if __name__ == '__main__': from CSVHandler import CSVHandler csv_handler = CSVHandler('../../data') preprocessor = Preprocessor() train = csv_handler.load_csv('train.csv') test = csv_handler.load_csv('test.csv') train, test = preprocessor.preprocess(train, test) print(train, test)
def resizeAndConcatenate(self, coordFileName, limit, use_high_pass_filter='False'): csvHand = CSVHandler() coordURL = self.dataDir + coordFileName + '.csv' nSamples, lastEpisode, epi_scen_list = csvHand.getEpScenValbyRec( coordURL) dimResize = self.dimResize if self.nCh > 1: inputs = np.zeros( [nSamples, dimResize[0], dimResize[1] * 3, self.nCh], dtype=np.uint8) else: inputs = np.zeros([nSamples, dimResize[0], dimResize[1] * 3, 1], dtype=np.uint8) for samp in range(0, nSamples): for cam in range(1, 4): epi_scen = epi_scen_list[samp] imgURL = self.dataDir + self.imgDataDir + 'camera' + str( cam) + '/' + '{:0>1}'.format(epi_scen[0]) + '.png' imgTmp = self.color_space_cvt( imgURL) #convert for the right color space #cv2.imshow("Resized image", imgTmp) #plot debug #cv2.waitKey(0) #cv2.destroyAllWindows() imgRes = cv2.resize(imgTmp, (dimResize[1], dimResize[0]), interpolation=cv2.INTER_AREA) if self.nCh == 1: if use_high_pass_filter == True: highPassKernel = np.array([[0, -1 / 4, 0], [-1 / 4, 2, -1 / 4], [0, -1 / 4, 0]]) imgResFilt = convolve2d(imgRes, highPassKernel, mode='same') inputs[samp, :, dimResize[1] * (cam - 1):dimResize[1] * cam, 0] = imgResFilt else: inputs[samp, :, dimResize[1] * (cam - 1):dimResize[1] * cam, 0] = imgRes elif self.nCh == 3: inputs[samp, :, dimResize[1] * (cam - 1):dimResize[1] * cam, :] = imgRes if (np.mod(samp + 1, 21) == 0): print("Generated samples: " + str(samp)) input_validation = inputs[limit:] input_train = inputs[:limit] #np.savez(self.inputDataDir+'img_input_'+self.learnSet+'_tst'+'_1_'+str(self.resizeFac)+'.npz',inputs=inputs) np.savez(self.inputDataDir + 'img_input_train_' + str(self.resizeFac) + '.npz', inputs=input_train) np.savez(self.inputDataDir + 'img_input_validation_' + str(self.resizeFac) + '.npz', inputs=input_validation) return inputs
# @author Salvador Orozco Villalever - A07104218 # @version 01/28/2019 # Python script for the sentiment analysis extraction from CSVHandler import CSVHandler from MiscellaneousFeaturesExtractor import MiscellaneousFeaturesExtractor # Set the data set path file dataset_file_pathToFile = 'datasets/with_extra_features' dataset_file_name = 'SentimentAnalysis_Emotion_MiscellaneousFeatures_test.csv' dataset_file_path = dataset_file_pathToFile + '/' + dataset_file_name # Set the request's interval requestIntervalSeconds = 0.0001 # Instantiate a MiscellaneousFeaturesExtractor miscellaneousFeaturesExtractor = MiscellaneousFeaturesExtractor( dataset_file_path, requestIntervalSeconds, apiKeyName="namesAPI_APIKey", pathToApiKeyFile=".env.json") miscellaneousFeaturesExtractor.extractFeatures() # Write the results to a new CSV file. resultsFile = dataset_file_pathToFile + '/' + 'PLUS_NEW-FEATURES_' + dataset_file_name myCSVWriter = CSVHandler(resultsFile, miscellaneousFeaturesExtractor.tweetList) myCSVWriter.writeTweetsToFile()
# @author Salvador Orozco Villalever - A07104218 # @version 01/28/2019 # Python script for the sentiment analysis extraction from CSVHandler import CSVHandler from SentimentAnalysisExtractor import SentimentAnalysisExtractor # Set the data set path file dataset_file_pathToFile = 'datasets/with_extra_features' dataset_file_name = 'Emotion_test.csv' dataset_file_path = dataset_file_pathToFile + '/' + dataset_file_name # Set the request's interval requestIntervalSeconds = 0.5 # Instantiate a SentimentAnalysisExtractor sentimentAnalysisExtractor = SentimentAnalysisExtractor( dataset_file_path, requestIntervalSeconds, "meaningCloud_APIKey", ".env.json") sentimentAnalysisExtractor.extractFeatures() # Write the results to a new CSV file. resultsFile = dataset_file_pathToFile + '/' + 'RESULTS_SENTIMENT-ANALYSIS_' + dataset_file_name myCSVWriter = CSVHandler(resultsFile, sentimentAnalysisExtractor.tweetList) myCSVWriter.writeTweetsToFile()
def getCarrier(code=None): """ Takes: carrier code (str) Query variables: airport-code (str), content-type (str) Returns: (carrierName (str), statisticsURI (str)) or list(carrierName (str), carrierURI (str)) """ ## Load args ## month = request.args.get("month") contentType = request.args.get("content-type") airportCode = request.args.get("airport-code") if (contentType == "None" or contentType is None): contentType = "application/json" queryString = "?airport-code=" + str(airportCode) + "&content-type=" + str( contentType) + "&month=" + str(month) ############### ## Logic ## if (code is None): # return all carrier URIs + carrier names. dataList = [] if (airportCode is None): allCarriers = Carrier.query.all() for c in allCarriers: dict = { "carrier-name": c.getName(), "uri": "/carriers/" + c.getCode() + "?content-type=" + str(contentType), "carrier-code": c.getCode() } dataList.append(dict) else: airport = Airport.query.filter_by(code=airportCode).first() if (airport is None): flask.abort(400, "400(invalid parameter): airport code invalid") else: relations = Relation_table.query.filter_by( airportID=airport.id).all() for r in relations: carrier = Carrier.query.filter_by( id=r.getCarrierID()).first() dict = { "carrier-name": carrier.getName(), "uri": "/carriers/" + carrier.getCode() + queryString, "carrier-code": carrier.getCode() } if (not (dict in dataList)): dataList.append(dict) else: # return specific statistics URI + carrier name. carrier = Carrier.query.filter_by(code=code).first() if (not (carrier is None)): relations = Relation_table.query.filter_by( carrierID=carrier.id).all() airportURIs = [] for r in relations: airport = Airport.query.filter_by(id=r.getAirportID()).first() queryString2 = "?content-type=" + str( contentType) + "&month=" + str(month) uri = "/airports/" + str(airport.getCode()) + queryString2 if (not uri in airportURIs): airportURIs.append(uri) dict = { "carrier-name": carrier.getName(), "statistics-uri": "/carriers/" + carrier.getCode() + "/statistics" + queryString, "airport-uris": airportURIs } else: flask.abort(400, "400(invalid paramater): carrier code invalid") if (contentType == "text/csv"): if (code is None): return CSVHandler.getCarrierCSV(dataList=dataList) else: return CSVHandler.getCarrierCSV(dictionary=dict) else: if (code is None): return json.dumps(dataList) else: return json.dumps(dict)
def getAirport(code=None): """ Takes: airport code (str) Query variables: content-type (str) Returns: (airportName (str), airportURI (str)) or list(airportName (str), airportURI (str)) """ ## Load args ## month = request.args.get("month") contentType = request.args.get("content-type") airportCode = request.args.get("airport-code") if (contentType == "None" or contentType is None): contentType = "application/json" queryString = "?content-type=" + str(contentType) + "&month=" + str(month) ############### ## Logic ## if (code is None): # return all airport URIs + airport names. allAirports = Airport.query.all() dataList = [] for a in allAirports: dict = { "name": a.getName(), "uri": "/airports/" + a.getCode() + queryString } dataList.append(dict) else: # return all carrier URIs with airport + airport name airport = Airport.query.filter_by(code=code).first() if (not (airport is None)): queryString = "?airport-code=" + str(airport.getCode( )) + "&content-type=" + str(contentType) + "&month=" + str(month) relations = Relation_table.query.filter_by(airportID=airport.id) dataList = [] for r in relations: carriers = Carrier.query.filter_by(id=r.getCarrierID()) for c in carriers: uri = "/carriers/" + c.getCode() + queryString name = c.getName() dict = { "uri": uri, "carrier-name": name, "carrier-code": c.getCode() } if (not dict in dataList): dataList.append(dict) dict = {"name": airport.getName(), "uri-list": dataList} else: flask.abort(400, "400(invalid paramater): airport code invalid") if (contentType == "text/csv"): if (code is None): return CSVHandler.getAirportCSV(dataList=dataList) else: return CSVHandler.getAirportCSV(dictionary=dict) else: if (code is None): return json.dumps(dataList) else: return json.dumps(dict)
# @author Salvador Orozco Villalever - A07104218 # @version 02/02/2019 # Script for extracting emotions using Indico's emotion extraction API from EmotionExtractor import EmotionExtractor from CSVHandler import CSVHandler # Set the data set path file dataset_file_pathToFile = 'datasets' dataset_file_name = 'test_copy.csv' dataset_file_path = dataset_file_pathToFile + '/' + dataset_file_name # Set the request's interval requestIntervalSeconds = 0.1 # Instantiate an EmotionExtractor emotionExtractor = EmotionExtractor(dataset_file_path, requestIntervalSeconds, "indico_APIKey", ".env.json") emotionExtractor.setAPIKey() emotionExtractor.extractFeatures() # Write the results to a new CSV file. resultsFile = dataset_file_pathToFile + '/' + 'test_copy_EmotionFeatures.csv' myCSVWriter = CSVHandler(resultsFile, emotionExtractor.tweetList) myCSVWriter.writeTweetsToFile()