Exemple #1
0
def get_json_files_data(path, min=1):
    """
    Get all the file of a given extension (default is .csv) in a directory.
    Error if less than min are find
    :param path: directory where the files are
    :param min: minimum number of files that needs to be find (if less Error)
    :return: files with the given extension
    """

    json_files = find_files(path, "json", min)
    json_data = dict()

    print("===========================================")
    print("= Converting JSON data into Python object =")
    print("===========================================")
    i = 0
    for file in json_files:
        base = os.path.basename(file)  # name with extension (ex. 'file.json')
        id = os.path.splitext(
            base
        )[0]  # name without extension (ex. 'file') in this case, the names are the trip ids
        json_data[id] = json.load(
            open(file))  # get the json data as a python dict
        printrp('( ' + str(i) + ' / ' + str(len(json_files) - 1) +
                ' )') if found_CmdPrinter else print(i)
        i += 1

    print('( ' + str(i - 1) + ' / ' + str(len(json_files) - 1) + ' )')
    return json_data
Exemple #2
0
def write_csv_file(json_datas, attributes):
    """
    Write information from the json_data corresponding to the attributes in a csv file
    :param json_data: data from which to retrieve the information (expect to be a dict with tripIDs as keys and json data (in python dict) as value)
    :param attributes: array of attributes to retrieve from the json files (must be of format ["path/to/attribute", "path/to/other/attribute"]
    :return:
    """
    j = 0

    csv_output_file = 'results.csv'

    splited_attributes = []
    for attribute in attributes:
        splited_attributes += [attribute.split('/')]

    # Exemple of the above process:
    # Input : ['plan/itineraries/0/duration', 'requestParameters/date]
    # Output : [['plan','itineraries,'0','duration'], ['requestParameters','date']]

    with open(csv_output_file, 'w', newline='') as csvfile:
        csvwriter = csv.writer(csvfile, delimiter=',')

        # write the header
        header = []
        for attribute in splited_attributes:
            header += [attribute[-1]] # attribute[-1] is the name of the attribute
            # TODO : Make the header more significant
        csvwriter.writerow(header)

        for id in json_datas.keys():
            data = json_datas[id]

            j += 1  # counter to keep track of the progress

            if found_CmdPrinter :
                # if we have a CmdPrinter, use this method
                printrp(str(j) + '...')
            else:
                print(j)

            if 'error' in data:
                # if no itineraries were find (ie. there was an error), write the error id and short error message
                csvwriter.writerow([id] + ['error'] + [str(data['error']['id'])] + [str(data['error']['message'])])
            else:

                row = []  # the next row that will be written in the csv file
                loop = False

                splited_attributes = adjust_path(splited_attributes, data)

                for attribute_path in splited_attributes:

                    # From the above exemple :
                    # splited_attibutes = [['plan','itineraries,'0','duration'], ['requestParameters','date']]
                    # attribute = ['plan','itineraries,'0','duration']

                    row += [find_attribute(attribute_path, data)]

                csvwriter.writerow(row)
Exemple #3
0
def download_json_files(ori, des, date, hredep, args):
    """
    Wrapper method to clean the code.
    Calls all the methods needed to download the JSON files from OTP API
    :param ori: origin file
    :param des: destination file
    :param hredep: (Optional) hours file
    :return:
    """

    print("======================================")
    print("= Extracting JSON files from OTP API =")
    print("======================================")
    i = 0
    for id in ori.keys(
    ):  # just so we can get all the ids (could have been des.keys() or hredep.keys())
        if download_json:
            # don't retrieve the data from OTP API if the user specifies it
            url = build_url(ori[id]['orilon'], ori[id]['orilat'],
                            des[id]['deslon'], des[id]['deslat'],
                            date[id]['year'], date[id]['month'],
                            date[id]['day'], hredep[id]['hour'],
                            hredep[id]['minute'], args[id])
            try:
                extract_json(url, id, make_dir(json_output))
            except OSError:
                print("ERROR : OTP is not currently running on the given port")
                exit()

            printrp('( ' + str(i) + ' / ' + str(len(ori)) +
                    ' )') if found_CmdPrinter else print(i)
            i += 1
    if download_json:
        print('( ' + str(i) + ' / ' + str(len(ori)) + ' )')
    else:
        print("Already downloaded")
Exemple #4
0
def create_od_single_file(od_survey):
    ori = dict()
    des = dict()
    hredep = dict()
    date = dict()
    args = dict(
    )  # to store non-classic parameters (such as age, sex, salary, etc.)

    print("======================================================")
    print("= Extracting Origin/Destionation data from csv files =")
    print("======================================================")

    with open(od_survey, 'r', newline='') as csvfile:
        reader = csv.DictReader(csvfile)
        i = 0

        numline = sum(1 for line in csvfile) - 1

        csvfile.seek(0)
        for row in reader:
            try:
                orilon = row['orilon']
                orilat = row['orilat']
                ori[i] = {'orilon': orilon, 'orilat': orilat}
                deslon = row['deslon']
                deslat = row['deslat']
                des[i] = {'deslon': deslon, 'deslat': deslat}
            except KeyError:
                print(
                    "ERROR - Missing Origin/Destination coordinates.  "
                    "Make sure the headers for OD coordinates are 'orilon', 'orilat', 'deslon', 'deslat'"
                )

            try:
                year = row['year']
            except KeyError:
                if verbose:
                    print("WARN - Line ", i,
                          " : No column 'year' found.  Used default ",
                          DEFAULT_YEAR)
                year = DEFAULT_YEAR

            try:
                month = row['month']
            except KeyError:
                if verbose:
                    print("WARN - Line ", i,
                          " : No column 'month' found.  Used default ",
                          DEFAULT_MONTH)
                month = DEFAULT_MONTH

            try:
                day = row['day']
            except KeyError:
                if verbose:
                    print("WARN - Line ", i,
                          " : No column 'day' found.  Used default ",
                          DEFAULT_DAY)
                day = DEFAULT_DAY

            date[i] = {'year': year, 'month': month, 'day': day}

            try:
                hour = row['hour']
            except KeyError:
                if verbose:
                    print("WARN - Line ", i,
                          " : No column 'hour' found.  Used default ",
                          DEFAULT_HOUR)
                hour = DEFAULT_HOUR

            try:
                minute = row['minute']
            except KeyError:
                if verbose:
                    print("WARN - Line ", i,
                          " : No column 'minute' found.  Used default ",
                          DEFAULT_MINUTE)
                minute = DEFAULT_MINUTE

            hredep[i] = {'hour': hour, 'minute': minute}

            tmp_args = dict()
            for column in row.keys():
                if column != 'orilon' and column !='orilat' and column !='deslon' \
                        and column !='deslat' and column !='year' and column !='month' \
                        and column !='day' and column !='hour' and column !='minute':
                    tmp_args[column] = row[column]
            args[i] = tmp_args
            i += 1
            printrp('( ' + str(i) + ' / ' + str(numline) +
                    ' )') if found_CmdPrinter else print(i)

        print('( ' + str(i) + ' / ' + str(numline) + ' )')

    return ori, des, date, hredep, args
Exemple #5
0
def write_csv_file(csv_output_file, full_data):
    """
    Write information in a csv file
    :param csv_output_file: name of the write-to file (will be create if doesn't already exist and overwrite if it does)(name must end with .csv)
    :param full_data: data of the json files as python object data[id] = dict() contaning one json file information
    :return:
    """
    j = 0
    csv_file_path = make_dir(csv_output_file)

    # csv_file_path = os.path.join(csv_file_path, csv_output_file)
    try:
        with open(csv_file_path, 'w', newline='') as csvfile:
            csvwriter = csv.writer(csvfile, delimiter=',')
            csvwriter.writerow([
                'tripId', 'agency_tripId', 'itinerary_nb', 'modes',
                'actual_time', 'perceived_time', 'start_time', 'end_time',
                'walk_time', 'walk_distance', 'transit_time', 'waiting_time',
                'boardings', 'bus_lines_numbers', 'boarding_stop_ids',
                'debarquer_stop_ids'
            ])
            print("======================================")
            print("= Creating CSV file from JSON files  =")
            print("======================================")
            for id in full_data.keys():  # just so we can get all the ids
                data = full_data[id]
                j += 1

                printrp('( ' + str(j) + ' / ' + str(len(full_data) - 1) +
                        ' )') if found_CmdPrinter else print(j)

                if 'error' in data:
                    # if no itineraries were find (ie. there was an error), write the error id and error message
                    # note : msg is the short message (eg. PATH_NOT_FOUND), message is the long description
                    csvwriter.writerow([id] + ['error'] +
                                       [str(data['error']['id'])] +
                                       [str(data['error']['message'])] +
                                       [str(data['error']['msg'])])
                else:
                    for itinerary_nb in range(len(
                            data['plan']['itineraries'])):

                        boarding = 0
                        busNbs = ""
                        boarding_stop_ids = ""
                        debarquer_stop_ids = ""
                        agency_trip_ids = ""
                        modes = ""
                        for leg in data['plan']['itineraries'][itinerary_nb][
                                'legs']:
                            modes += leg['mode'] + ';'
                            if leg['mode'] == 'BUS':
                                # every time a BUS step is included in the itineraries :
                                # add 1 to the boarding counter
                                # add the bus line number to busNbs
                                # add the stop_ids to boarding_stop_ids and debarquer_stop_ids
                                boarding += 1
                                busNbs += leg['route'] + ";"

                                boarding_stop_ids += str(
                                    leg['from']['stopCode']) + ';'
                                debarquer_stop_ids += str(
                                    leg['to']['stopCode']) + ';'
                                agency_trip_ids += str(
                                    leg['tripId'].split(':')[1]) + ';'
                                # we need to .split that line because tripId is given as agencyId:tripId

                        busNbs = busNbs[:
                                        -1]  # removing the trailing semi-colon
                        boarding_stop_ids = boarding_stop_ids[:-1]
                        debarquer_stop_ids = debarquer_stop_ids[:-1]
                        agency_trip_ids = agency_trip_ids[:-1]
                        modes = modes[:-1]
                        startTime = time.strftime(
                            '%Y-%m-%d %H:%M:%S',
                            time.localtime(data['plan']['itineraries']
                                           [itinerary_nb]['startTime'] / 1000))
                        endTime = time.strftime(
                            '%Y-%m-%d %H:%M:%S',
                            time.localtime(data['plan']['itineraries']
                                           [itinerary_nb]['endTime'] / 1000))
                        # those are /1000 because OTP gives Epoch time in milliseconds

                        walkTime = data['plan']['itineraries'][itinerary_nb][
                            'walkTime']
                        transitTime = data['plan']['itineraries'][
                            itinerary_nb]['transitTime']
                        waitingTime = data['plan']['itineraries'][
                            itinerary_nb]['waitingTime']

                        # Write all the information inside a csv file
                        csvwriter.writerow([
                            id,
                            str(agency_trip_ids),
                            str(itinerary_nb + 1),
                            str(modes),
                            str(data['plan']['itineraries'][itinerary_nb]
                                ['duration']),
                            str(
                                get_perceived_time(walkTime, transitTime,
                                                   waitingTime)),
                            str(startTime),
                            str(endTime),
                            str(walkTime),
                            str(data['plan']['itineraries'][itinerary_nb]
                                ['walkDistance']),
                            str(transitTime),
                            str(waitingTime),
                            str(boarding),
                            str(busNbs),
                            str(boarding_stop_ids),
                            str(debarquer_stop_ids)
                        ])
    except PermissionError:
        print(
            'ERROR - Cannot write to CSV file.  The file might be used by another app.'
        )
        exit()
    except OSError:
        print("ERROR - Couldn't open file " + csv_file_path +
              ". Please verify the file's permissions.")
    print('( ' + str(j - 1) + ' / ' + str(len(full_data) - 1) + ' )')