Exemple #1
0
def create_real_data():
    global x_train, y_train, x_test, y_test
    csv = CSVHandler()
    x_train, y_train = csv.create_data_from_csv('train_selected_china.csv')
    x_test, y_test = csv.create_data_from_csv('test_selected_china.csv')
    x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
    x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
    input_shape = (28, 28, 1)
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train /= 255
    x_test /= 255
Exemple #2
0
def processLidarData(data_folder, dataset, limit):
    csvHand = CSVHandler()
    print("Generating LIDAR ...")
    lidarDataDir = dataset + "/" + "lidar_data_s008/"
    inputDataDir = data_folder + "/lidar_input/"
    coordFileName = "CoordVehiclesRxPerScene_s008"
    coordURL = dataset + "/" + coordFileName + ".csv"

    if not (os.path.exists(inputDataDir)):
        os.mkdir(inputDataDir)
        print("Directory '% s' created" % inputDataDir)

    nSamples, lastEpisode, epi_scen = csvHand.getEpScenValbyRec(coordURL)
    obstacles_matrix_array_lidar = np.ones((nSamples, 20, 200, 10), np.int8)
    lidar_inputs_train = []
    lidar_inputs_test = []
    with open(coordURL) as csvfile:
        reader = csv.DictReader(csvfile)
        id_count = 0
        alreadyInMemoryEpisode = -1
        for row in reader:
            episodeNum = int(row["EpisodeID"])
            # if (episodeNum < numEpisodeStart) | (episodeNum > numEpisodeEnd):
            #    continue #skip episodes out of the interval
            isValid = row["Val"]  # V or I are the first element of the list thisLine
            if isValid == "I":
                continue  # skip invalid entries
            if episodeNum != alreadyInMemoryEpisode:  # just read if a new episode
                print("Reading Episode " + str(episodeNum) + " ...")
                currentEpisodesInputs = np.load(
                    os.path.join(
                        lidarDataDir, "obstacles_e_" + str(episodeNum) + ".npz"
                    )
                )
                obstacles_matrix_array = currentEpisodesInputs["obstacles_matrix_array"]
                alreadyInMemoryEpisode = episodeNum  # update for other iterations
            r = int(row["VehicleArrayID"])  # get receiver number
            obstacles_matrix_array_lidar[id_count] = obstacles_matrix_array[r]
            id_count = id_count + 1

    lidar_inputs_test = obstacles_matrix_array_lidar[limit:]
    lidar_inputs_train = obstacles_matrix_array_lidar[:limit]

    # train
    np.savez(inputDataDir + "lidar_train.npz", input=lidar_inputs_train)
    # test
    np.savez(inputDataDir + "lidar_validation.npz", input=lidar_inputs_test)
Exemple #3
0
def getStatistics(code=None):
    """
        Takes: carrier code (str)
        Query variables: month (str), content-type (str), airportCode (str)
        Returns:  flights-uri, minutes-uri, amount-uri
    """

    ## Load args ##
    month = request.args.get("month")
    contentType = request.args.get("content-type")
    airportCode = request.args.get("airport-code")
    if (contentType == "None" or contentType is None):
        contentType = "application/json"

    print(str(month))
    queryString = "?airport-code=" + str(airportCode) + "&content-type=" + str(
        contentType) + "&month=" + str(month)
    ###############

    ## Logic     ##
    if (code is None):
        flask.abort(400, "400(invalid paramater): carrier code invalid(None)")
    else:
        carrier = Carrier.query.filter_by(code=code).first()
        if (carrier is None):
            flask.abort(400, "400(invalid paramater): carrier code invalid")
        else:
            if (airportCode is None):
                dict = {
                    "flights-uri":
                    "/carriers/" + code + "/statistics/flights" + queryString,
                    "minutes-uri":
                    "/carriers/" + code + "/statistics/delays/minutes" +
                    queryString,
                    "amount-uri":
                    "/carriers/" + code + "/statistics/delays/amount" +
                    queryString
                }

            else:
                ## Same as above
                dict = {
                    "flights-uri":
                    "/carriers/" + code + "/statistics/flights" + queryString,
                    "minutes-uri":
                    "/carriers/" + code + "/statistics/delays/minutes" +
                    queryString,
                    "amount-uri":
                    "/carriers/" + code + "/statistics/delays/amount" +
                    queryString
                }
    if (contentType == "text/csv"):
        return CSVHandler.getStatisticsCSV(dictionary=dict)

    else:
        return json.dumps(dict)
Exemple #4
0
def processCoordinates(data_folder, dataset, rsu_coord, area_shp):
    print("Generating Beams ...")
    csvHand = CSVHandler()

    inputDataDir = data_folder
    coordFileName = "CoordVehiclesRxPerScene_s008"
    coordURL = dataset + coordFileName + ".csv"

    coordinates_train, context_train, coordinates_test, context_test = csvHand.getCoord(
        coordURL, 1564)

    area_shp = [
        area_shp[0] - rsu_coord[0],
        area_shp[1] - rsu_coord[1],
        area_shp[2] - rsu_coord[0],
        area_shp[3] - rsu_coord[1],
    ]

    coordinates_train = [[(float(a) - float(b)) / c
                          for a, b, c in zip(x, rsu_coord, area_shp[2:])]
                         for x in coordinates_train
                         ]  # coordinates_train - rsu_coord

    coordinates_test = [[(float(a) - float(b) / c)
                         for a, b, c in zip(x, rsu_coord, area_shp[2:])]
                        for x in coordinates_test
                        ]  # coordinates_test - rsu_coord

    train_channels = len(coordinates_train)

    # train
    np.savez(inputDataDir + "coord_train" + ".npz",
             coordinates=coordinates_train)
    np.savez(inputDataDir + "context_train" + ".npz", context=context_train)
    # test
    np.savez(inputDataDir + "coord_validation" + ".npz",
             coordinates=coordinates_test)
    np.savez(inputDataDir + "context_test" + ".npz", context=context_test)

    print("Coord npz files saved!")

    return train_channels
Exemple #5
0
def processLidarData(num, csv_path):
    csvHand = CSVHandler()
    print('Generating LIDAR ...')
    lidarDataDir = './obstacles_' + num + '/'
    inputDataDir = './'
    coordFileName = csv_path
    coordURL = coordFileName
    if not (os.path.exists(inputDataDir)):
        os.mkdir(inputDataDir)
        print("Directory '% s' created" % inputDataDir)
    nSamples, lastEpisode, epi_scen = csvHand.getEpScenValbyRec(coordURL)
    obstacles_matrix_array_lidar = np.ones((nSamples, 180, 330, 10), np.int8)
    with open(coordURL) as csvfile:
        reader = csv.DictReader(csvfile)
        id_count = 0
        alreadyInMemoryEpisode = -1
        for row in reader:
            episodeNum = int(row['EpisodeID'])
            #if (episodeNum < numEpisodeStart) | (episodeNum > numEpisodeEnd):
            #    continue #skip episodes out of the interval
            isValid = row[
                'Val']  #V or I are the first element of the list thisLine
            if isValid == 'I':
                continue  #skip invalid entries
            if episodeNum != alreadyInMemoryEpisode:  #just read if a new episode
                if (episodeNum % 10 == 0):
                    print('Reading Episode ' + str(episodeNum) + ' ...')
                currentEpisodesInputs = np.load(
                    os.path.join(lidarDataDir,
                                 'obstacles_e_' + str(episodeNum) + '.npz'))
                obstacles_matrix_array = currentEpisodesInputs[
                    'obstacles_matrix_array']
                alreadyInMemoryEpisode = episodeNum  #update for other iterations
            r = int(row['VehicleArrayID'])  #get receiver number
            obstacles_matrix_array_lidar[id_count] = obstacles_matrix_array[r]
            id_count = id_count + 1
    lidar_inputs_train = np.int8(obstacles_matrix_array_lidar)
    #train
    np.savez_compressed(inputDataDir + 'lidar_' + num + '.npz',
                        input=lidar_inputs_train)
def processCoordinates(data_folder, dataset):
    print('Generating Beams ...')
    csvHand = CSVHandler()

    inputDataDir = data_folder + '/coord_input/'
    coordFileName = 'CoordVehiclesRxPerScene_s008'
    coordURL = dataset + '/' + coordFileName + '.csv'

    coordinates_train, coordinates_test = csvHand.getCoord(coordURL, 1564)

    train_channels = len(coordinates_train)

    #train
    np.savez(inputDataDir + 'coord_train' + '.npz',
             coordinates=coordinates_train)
    #test
    np.savez(inputDataDir + 'coord_validation' + '.npz',
             coordinates=coordinates_test)

    print('Coord npz files saved!')

    return train_channels
def processCoordinates(data_folder, dataset):
    print("Generating Beams ...")
    csvHand = CSVHandler()

    inputDataDir = data_folder + "/coord_input/"
    coordFileName = "CoordVehiclesRxPerScene_s008"
    coordURL = dataset + "/" + coordFileName + ".csv"

    coordinates_train, coordinates_test = csvHand.getCoord(coordURL, 1564)

    train_channels = len(coordinates_train)

    # train
    np.savez(inputDataDir + "coord_train" + ".npz",
             coordinates=coordinates_train)
    # test
    np.savez(inputDataDir + "coord_validation" + ".npz",
             coordinates=coordinates_test)

    print("Coord npz files saved!")

    return train_channels
Exemple #8
0
def getAmount(code=None):
    """
        Takes: carrier code (str)
        Query variables: month (str), content-type (str), delay (str)
        Returns:  Dictionary of amount + month + carrier uri
    """

    ## Load args ##
    month = request.args.get("month")
    contentType = request.args.get("content-type")
    airportCode = request.args.get("airport-code")
    if (contentType == "None" or contentType is None):
        contentType = "application/json"
    queryString = "?airport-code=" + str(airportCode) + "&content-type=" + str(
        contentType) + "&month=" + str(month)

    # Extra args functionality
    delayType = request.args.get("delay")
    if (airportCode == "None"):
        airportCode = None
    ###############

    ## Logic     ##
    if (code is None):
        flask.abort(404, "404(Carrier not found)")
    else:
        if (airportCode is None):
            carrier = Carrier.query.filter_by(code=code).first()

            if (carrier is None):
                flask.abort(404, "404(Carrier not found)")
            dictionary = Utility.getAmountByMonth(realCarrier=carrier,
                                                  month=month)
            dictionary["carrier-uri"] = "/carriers/" + code + queryString
            #return json.dumps(dictionary)
        else:
            carrier = Carrier.query.filter_by(code=code).first()
            airport = Airport.query.filter_by(code=airportCode).first()
            if (carrier is None or airport is None):
                flask.abort(404, "404(Carrier or Airport not found)")
            dictionary = Utility.getAmountByMonth(realCarrier=carrier,
                                                  airport=airport,
                                                  month=month)
            dictionary["carrier-uri"] = "/carriers/" + code + queryString
            #return json.dumps(dictionary)

    if (contentType == "text/csv"):
        return CSVHandler.getAmountCSV(dictionary)
    else:
        return json.dumps(dictionary)
Exemple #9
0
        pass

    def preprocess(self, train, test, except_num=False):
        all_data = pd.concat((train.drop('SalePrice', axis=1), test), axis=0)
        all_data = all_data.fillna(all_data.mean())  # fill NA and Nan

        # delete columns which have number type
        if except_num:
            for column in all_data.columns:
                dtype = all_data[column].dtype
                if dtype in ['float64', 'int64']:
                    all_data = all_data.drop(column, axis=1)
        else:
            pass

        all_data = pd.get_dummies(all_data)
        all_data = all_data.fillna(all_data.mean())
        train = all_data[:train.shape[0]]
        test = all_data[train.shape[0]:]
        return train, test


if __name__ == '__main__':
    from CSVHandler import CSVHandler
    csv_handler = CSVHandler('../../data')
    preprocessor = Preprocessor()
    train = csv_handler.load_csv('train.csv')
    test = csv_handler.load_csv('test.csv')
    train, test = preprocessor.preprocess(train, test)
    print(train, test)
    def resizeAndConcatenate(self,
                             coordFileName,
                             limit,
                             use_high_pass_filter='False'):

        csvHand = CSVHandler()
        coordURL = self.dataDir + coordFileName + '.csv'
        nSamples, lastEpisode, epi_scen_list = csvHand.getEpScenValbyRec(
            coordURL)
        dimResize = self.dimResize
        if self.nCh > 1:
            inputs = np.zeros(
                [nSamples, dimResize[0], dimResize[1] * 3, self.nCh],
                dtype=np.uint8)
        else:
            inputs = np.zeros([nSamples, dimResize[0], dimResize[1] * 3, 1],
                              dtype=np.uint8)

        for samp in range(0, nSamples):
            for cam in range(1, 4):
                epi_scen = epi_scen_list[samp]
                imgURL = self.dataDir + self.imgDataDir + 'camera' + str(
                    cam) + '/' + '{:0>1}'.format(epi_scen[0]) + '.png'
                imgTmp = self.color_space_cvt(
                    imgURL)  #convert for the right color space
                #cv2.imshow("Resized image", imgTmp) #plot debug
                #cv2.waitKey(0)
                #cv2.destroyAllWindows()
                imgRes = cv2.resize(imgTmp, (dimResize[1], dimResize[0]),
                                    interpolation=cv2.INTER_AREA)

                if self.nCh == 1:
                    if use_high_pass_filter == True:
                        highPassKernel = np.array([[0, -1 / 4, 0],
                                                   [-1 / 4, 2, -1 / 4],
                                                   [0, -1 / 4, 0]])
                        imgResFilt = convolve2d(imgRes,
                                                highPassKernel,
                                                mode='same')
                        inputs[samp, :,
                               dimResize[1] * (cam - 1):dimResize[1] * cam,
                               0] = imgResFilt
                    else:
                        inputs[samp, :,
                               dimResize[1] * (cam - 1):dimResize[1] * cam,
                               0] = imgRes
                elif self.nCh == 3:
                    inputs[samp, :, dimResize[1] * (cam - 1):dimResize[1] *
                           cam, :] = imgRes

            if (np.mod(samp + 1, 21) == 0):
                print("Generated samples: " + str(samp))
        input_validation = inputs[limit:]
        input_train = inputs[:limit]

        #np.savez(self.inputDataDir+'img_input_'+self.learnSet+'_tst'+'_1_'+str(self.resizeFac)+'.npz',inputs=inputs)
        np.savez(self.inputDataDir + 'img_input_train_' + str(self.resizeFac) +
                 '.npz',
                 inputs=input_train)
        np.savez(self.inputDataDir + 'img_input_validation_' +
                 str(self.resizeFac) + '.npz',
                 inputs=input_validation)
        return inputs
# @author Salvador Orozco Villalever - A07104218
# @version 01/28/2019

# Python script for the sentiment analysis extraction

from CSVHandler import CSVHandler
from MiscellaneousFeaturesExtractor import MiscellaneousFeaturesExtractor

# Set the data set path file
dataset_file_pathToFile = 'datasets/with_extra_features'
dataset_file_name = 'SentimentAnalysis_Emotion_MiscellaneousFeatures_test.csv'
dataset_file_path = dataset_file_pathToFile + '/' + dataset_file_name

# Set the request's interval
requestIntervalSeconds = 0.0001

# Instantiate a MiscellaneousFeaturesExtractor
miscellaneousFeaturesExtractor = MiscellaneousFeaturesExtractor(
    dataset_file_path,
    requestIntervalSeconds,
    apiKeyName="namesAPI_APIKey",
    pathToApiKeyFile=".env.json")
miscellaneousFeaturesExtractor.extractFeatures()

# Write the results to a new CSV file.
resultsFile = dataset_file_pathToFile + '/' + 'PLUS_NEW-FEATURES_' + dataset_file_name
myCSVWriter = CSVHandler(resultsFile, miscellaneousFeaturesExtractor.tweetList)
myCSVWriter.writeTweetsToFile()
Exemple #12
0
# @author Salvador Orozco Villalever - A07104218
# @version 01/28/2019

# Python script for the sentiment analysis extraction

from CSVHandler import CSVHandler
from SentimentAnalysisExtractor import SentimentAnalysisExtractor

# Set the data set path file
dataset_file_pathToFile = 'datasets/with_extra_features'
dataset_file_name = 'Emotion_test.csv'
dataset_file_path = dataset_file_pathToFile + '/' + dataset_file_name

# Set the request's interval
requestIntervalSeconds = 0.5

# Instantiate a SentimentAnalysisExtractor
sentimentAnalysisExtractor = SentimentAnalysisExtractor(
    dataset_file_path, requestIntervalSeconds, "meaningCloud_APIKey",
    ".env.json")
sentimentAnalysisExtractor.extractFeatures()

# Write the results to a new CSV file.
resultsFile = dataset_file_pathToFile + '/' + 'RESULTS_SENTIMENT-ANALYSIS_' + dataset_file_name
myCSVWriter = CSVHandler(resultsFile, sentimentAnalysisExtractor.tweetList)
myCSVWriter.writeTweetsToFile()
Exemple #13
0
def getCarrier(code=None):
    """
        Takes: carrier code (str)
        Query variables: airport-code (str), content-type (str)
        Returns:  (carrierName (str), statisticsURI (str)) or list(carrierName (str), carrierURI (str))
    """

    ## Load args ##
    month = request.args.get("month")
    contentType = request.args.get("content-type")
    airportCode = request.args.get("airport-code")
    if (contentType == "None" or contentType is None):
        contentType = "application/json"
    queryString = "?airport-code=" + str(airportCode) + "&content-type=" + str(
        contentType) + "&month=" + str(month)
    ###############

    ## Logic     ##
    if (code is None):
        # return all carrier URIs + carrier names.
        dataList = []
        if (airportCode is None):
            allCarriers = Carrier.query.all()

            for c in allCarriers:
                dict = {
                    "carrier-name":
                    c.getName(),
                    "uri":
                    "/carriers/" + c.getCode() + "?content-type=" +
                    str(contentType),
                    "carrier-code":
                    c.getCode()
                }
                dataList.append(dict)
        else:
            airport = Airport.query.filter_by(code=airportCode).first()
            if (airport is None):
                flask.abort(400,
                            "400(invalid parameter): airport code invalid")
            else:
                relations = Relation_table.query.filter_by(
                    airportID=airport.id).all()
                for r in relations:
                    carrier = Carrier.query.filter_by(
                        id=r.getCarrierID()).first()
                    dict = {
                        "carrier-name": carrier.getName(),
                        "uri": "/carriers/" + carrier.getCode() + queryString,
                        "carrier-code": carrier.getCode()
                    }
                    if (not (dict in dataList)):
                        dataList.append(dict)

    else:
        # return specific statistics URI + carrier name.
        carrier = Carrier.query.filter_by(code=code).first()
        if (not (carrier is None)):
            relations = Relation_table.query.filter_by(
                carrierID=carrier.id).all()
            airportURIs = []
            for r in relations:
                airport = Airport.query.filter_by(id=r.getAirportID()).first()
                queryString2 = "?content-type=" + str(
                    contentType) + "&month=" + str(month)
                uri = "/airports/" + str(airport.getCode()) + queryString2
                if (not uri in airportURIs):
                    airportURIs.append(uri)

            dict = {
                "carrier-name":
                carrier.getName(),
                "statistics-uri":
                "/carriers/" + carrier.getCode() + "/statistics" + queryString,
                "airport-uris":
                airportURIs
            }

        else:
            flask.abort(400, "400(invalid paramater): carrier code invalid")

    if (contentType == "text/csv"):
        if (code is None):
            return CSVHandler.getCarrierCSV(dataList=dataList)
        else:
            return CSVHandler.getCarrierCSV(dictionary=dict)

    else:
        if (code is None):
            return json.dumps(dataList)
        else:
            return json.dumps(dict)
Exemple #14
0
def getAirport(code=None):
    """
        Takes: airport code (str)
        Query variables: content-type (str)
        Returns:  (airportName (str), airportURI (str)) or list(airportName (str), airportURI (str))
    """

    ## Load args ##
    month = request.args.get("month")
    contentType = request.args.get("content-type")
    airportCode = request.args.get("airport-code")
    if (contentType == "None" or contentType is None):
        contentType = "application/json"
    queryString = "?content-type=" + str(contentType) + "&month=" + str(month)
    ###############

    ## Logic     ##
    if (code is None):
        # return all airport URIs + airport names.
        allAirports = Airport.query.all()
        dataList = []
        for a in allAirports:
            dict = {
                "name": a.getName(),
                "uri": "/airports/" + a.getCode() + queryString
            }
            dataList.append(dict)

    else:
        # return all carrier URIs with airport + airport name
        airport = Airport.query.filter_by(code=code).first()
        if (not (airport is None)):
            queryString = "?airport-code=" + str(airport.getCode(
            )) + "&content-type=" + str(contentType) + "&month=" + str(month)
            relations = Relation_table.query.filter_by(airportID=airport.id)
            dataList = []

            for r in relations:
                carriers = Carrier.query.filter_by(id=r.getCarrierID())
                for c in carriers:
                    uri = "/carriers/" + c.getCode() + queryString
                    name = c.getName()
                    dict = {
                        "uri": uri,
                        "carrier-name": name,
                        "carrier-code": c.getCode()
                    }
                    if (not dict in dataList):
                        dataList.append(dict)

            dict = {"name": airport.getName(), "uri-list": dataList}

        else:
            flask.abort(400, "400(invalid paramater): airport code invalid")

    if (contentType == "text/csv"):
        if (code is None):
            return CSVHandler.getAirportCSV(dataList=dataList)
        else:
            return CSVHandler.getAirportCSV(dictionary=dict)

    else:
        if (code is None):
            return json.dumps(dataList)
        else:
            return json.dumps(dict)
# @author Salvador Orozco Villalever - A07104218
# @version 02/02/2019

# Script for extracting emotions using Indico's emotion extraction API
from EmotionExtractor import EmotionExtractor
from CSVHandler import CSVHandler

# Set the data set path file
dataset_file_pathToFile = 'datasets'
dataset_file_name = 'test_copy.csv'
dataset_file_path = dataset_file_pathToFile + '/' + dataset_file_name

# Set the request's interval
requestIntervalSeconds = 0.1

# Instantiate an EmotionExtractor
emotionExtractor = EmotionExtractor(dataset_file_path, requestIntervalSeconds,
                                    "indico_APIKey", ".env.json")
emotionExtractor.setAPIKey()
emotionExtractor.extractFeatures()

# Write the results to a new CSV file.
resultsFile = dataset_file_pathToFile + '/' + 'test_copy_EmotionFeatures.csv'
myCSVWriter = CSVHandler(resultsFile, emotionExtractor.tweetList)
myCSVWriter.writeTweetsToFile()