Ejemplo n.º 1
0
def uncompress(file_path):
    """
    Uncompress in place file.
    If the file does not exist we return False
    If the file does not have a bz2 extension we don't
    uncompress it.

    returns a tuple of success and the path string describing the uncompressed
    file.

    Parameters
    ----------
    file_path a valid path to a file to be uncompressed.

    """
    if Path(file_path).exists():
        if has_bz2_extension(file_path):
            uncompressed_path = file_path[:-4]
            uncompressor = BZ2Decompressor()
            with open(file_path, 'rb') as compressed:
                with open(uncompressed_path, 'wb') as uncompressed:
                    for bytes in iter(compressed.readline, b''):
                        uncompressed.write(uncompressor.decompress(bytes))
                return (True, uncompressed_path)
        else:
            return (True, file_path)
    else:
        return (False, "File does not exist: " + file_path)
Ejemplo n.º 2
0
def compress(file_path):
    """
    Compress in place file.
    If the file does not exist we return False
    If the file already has a bz2 extension we don't
    compress it.

    returns a tuple of success and the path string describing the compressed
    file.

    Parameters
    ----------
    file_path a valid path to a file to be compressed.

    """
    if Path(file_path).exists():
        if not has_bz2_extension(file_path):
            compressed_path = file_path + BZ2_FILE_EXTENSION
            compressor = BZ2Compressor()
            with open(file_path, 'rb') as uncompressed:
                with open(compressed_path, 'wb') as compressed:
                    for bytes in iter(uncompressed.readline, b''):
                        compressed.write(compressor.compress(bytes))
                    compressed.write(compressor.flush())
            return (True, compressed_path)
        else:
            return (True, file_path)
    else:
        return (False, "File does not exist: " + file_path)
Ejemplo n.º 3
0
def extract_next_day_items(filename, ids_df, date_fields=[]):
    """
    Read filename into a pandas Dataframe and extract items in ids_df.

    It reads items (flights, positions or events) from filename into a
    pandas Dataframe and merges the items with ids_df on FLIGHT_ID as a UUID.

    It writes a copy of the Dataframe items WITHOUT mathing ids into a file
    called 'new_' + filename

    Returns a pandas DataFrame containing items with matching ids in
    the FLIGHT_ID column, with the flight ids from the NEW_FLIGHT_ID.

    """
    # An empty data frame to return
    new_items_df = pd.DataFrame()

    next_df = pd.DataFrame()
    try:
        if date_fields:
            next_df = pd.read_csv(filename,
                                  parse_dates=date_fields,
                                  converters={'FLIGHT_ID': lambda x: UUID(x)},
                                  memory_map=True)
        else:
            next_df = pd.read_csv(filename,
                                  converters={'FLIGHT_ID': lambda x: UUID(x)},
                                  memory_map=True)
        log.info('%s read ok', filename)
    except EnvironmentError:
        log.error('could not read file: %s', filename)
        return new_items_df  # return empty DataFrame

    # Create a new dataframe WITHOUT any items that are in ids_df
    new_next_df = next_df[(~next_df['FLIGHT_ID'].isin(ids_df.index))]

    # Output the new next items
    new_next_filename = 'new_' + filename
    try:
        is_bz2 = has_bz2_extension(filename)
        if is_bz2:
            new_next_filename = new_next_filename[:-BZ2_LENGTH]

        new_next_df.to_csv(new_next_filename,
                           index=False,
                           date_format=ISO8601_DATETIME_FORMAT)
        log.info('written file: %s', new_next_filename)
    except EnvironmentError:
        log.error('could not write file: %s', new_next_filename)
        return new_items_df  # return empty DataFrame

    # get the new items from the next DataFrame
    new_items_df = pd.merge(ids_df,
                            next_df,
                            left_index=True,
                            right_on='FLIGHT_ID')
    replace_old_flight_ids(new_items_df)

    return new_items_df  # return new items
def merge_next_day_items(prev_filename, next_filename, ids_df, log):
    """
    Gets the next days items (positions or events) that are the continuation
    of the previous days items them with the previous days items.

    It writes the new next days and previous days items to files prepended
    with new.

    it returns True if successful, False otherwise.
    """
    new_items_df = get_next_day_items(next_filename,
                                      ids_df,
                                      log,
                                      date_fields=['TIME'])

    # free memory used by get_next_day_items
    gc.collect()

    prev_df = pd.DataFrame()
    try:
        prev_df = pd.read_csv(prev_filename,
                              parse_dates=['TIME'],
                              converters={'FLIGHT_ID': lambda x: UUID(x)},
                              memory_map=True)
        log.info('%s read ok', prev_filename)
    except EnvironmentError:
        log.error('could not read file: %s', prev_filename)
        return False

    # merge the new items into the previous DataFrame
    new_prev_df = pd.concat([prev_df, new_items_df], ignore_index=True)
    new_prev_df.sort_values(by=['FLIGHT_ID', 'TIME'], inplace=True)

    # Output the new previous items
    new_prev_filename = 'new_' + prev_filename
    try:
        is_bz2 = has_bz2_extension(prev_filename)
        if is_bz2:
            new_prev_filename = new_prev_filename[:-BZ2_LENGTH]

        new_prev_df.to_csv(new_prev_filename,
                           index=False,
                           date_format=ISO8601_DATETIME_FORMAT)
        log.info('written file: %s', new_prev_filename)
    except EnvironmentError:
        log.error('could not write file: %s', new_prev_filename)
        return False

    return True
def merge_flights(prev_flights_filename, next_flights_filename, ids_df, log):
    """
    Gets the next days flights that are the continuation of the previous days
    flights and merges them with the previous days flights.

    It writes the new next days and previous days flights to files prepended
    with new.

    it returns True if successful, False otherwise.
    """
    new_items_df = get_next_day_items(next_flights_filename, ids_df, log)

    # free memory used by get_next_day_items
    gc.collect()

    prev_flights_df = pd.DataFrame()
    try:
        prev_flights_df = pd.read_csv(
            prev_flights_filename,
            index_col='FLIGHT_ID',
            converters={'FLIGHT_ID': lambda x: UUID(x)},
            memory_map=True)
        log.info('%s read ok', prev_flights_filename)
    except EnvironmentError:
        log.error('could not read file: %s', prev_flights_filename)
        return False

    # merge next days flight data with the previous days flight data
    update_flight_data(prev_flights_df, new_items_df)

    # Output the new previous flights
    new_prev_flights_filename = 'new_' + prev_flights_filename
    try:
        is_bz2 = has_bz2_extension(prev_flights_filename)
        if is_bz2:
            new_prev_flights_filename = new_prev_flights_filename[:-BZ2_LENGTH]

        prev_flights_df.to_csv(new_prev_flights_filename,
                               index=True,
                               date_format=ISO8601_DATETIME_FORMAT)
        log.info('written file: %s', new_prev_flights_filename)
    except EnvironmentError:
        log.error('could not write file: %s', new_prev_flights_filename)
        return False

    return True
Ejemplo n.º 6
0
def generate_positions(filename):
    """
    Generate trajectory positions from a csv file.

    A python generator function to read a csv file containing positions.
    It read positions one flight at a time to minimise memory use.
    """
    is_bz2 = has_bz2_extension(filename)
    with bz2.open(filename, 'rt',  newline="") if (is_bz2) else \
            open(filename, 'r') as file:
        try:
            # Skip the header row
            line = next(file)

            # Get the first position line
            line = next(file)
            fields = line.split(',')
            flight_id = fields[0]
            line_buffer = [line]

            line = next(file)
            while line:
                # Determine whether the flight_id has changed
                fields = line.split(',')
                if flight_id != fields[0]:
                    flight_id = fields[0]
                    yield line_buffer
                    line_buffer = [line]
                else:  # just add the line to the end of the buffer
                    line_buffer.append(line)

                line = next(file)

        except StopIteration:
            # return the positions of the last flight
            yield line_buffer
def find_sector_intersections(filename,
                              logging_msg_count=DEFAULT_LOGGING_COUNT):
    """
    Find intersections between trajectories and airspace sectors.

    Parameters
    ----------
    filename: a string
        The name of a trajectories file.

    logging_msg_count: int
        The number of trajectories between logging count messages.
        default DEFAULT_LOGGING_COUNT.

    Returns
    -------
    An errno error_code if an error occured, zero otherwise.

    """
    log.info(f'trajectories file: {filename}')
    trajectories_filename = os.path.basename(filename)

    is_bz2 = has_bz2_extension(filename)
    if is_bz2:  # remove the .bz2 from the end of the filename
        trajectories_filename = trajectories_filename[:-len(BZ2_FILE_EXTENSION
                                                            )]

    # Write the sector intersections into a csv file with output_filename
    output_filename = trajectories_filename.replace(TRAJECTORIES,
                                                    SECTOR_INTERSECTIONS)
    output_filename = output_filename.replace(JSON_FILE_EXTENSION,
                                              CSV_FILE_EXTENSION)
    try:
        with open(output_filename, 'w') as file:
            file.write(AIRSPACE_INTERSECTION_FIELDS)

            flights_count = 0
            zeros_count = 0
            smoothed_trajectories = generate_SmoothedTrajectories(filename)
            for smooth_traj in smoothed_trajectories:
                try:
                    flight_id = smooth_traj.flight_id
                    sect_ints = find_trajectory_sector_intersections(
                        smooth_traj)
                    if not sect_ints.empty:
                        sect_ints.to_csv(
                            file,
                            index=False,
                            header=False,
                            mode='a',
                            date_format=ISO8601_DATETIME_US_FORMAT)
                    else:
                        zeros_count += 1
                        # log.warn(f'no intersections found with flight: {flight_id}')

                    flights_count += 1
                    if not (flights_count % logging_msg_count):
                        log.info(f'{flights_count} trajectories processed')

                except ValueError:
                    log.exception(
                        f'find_trajectory_sector_intersections id: {flight_id}'
                    )

                except StopIteration:
                    pass

            log.info(
                f'find_sector_intersections finished for {flights_count} trajectories'
            )
            log.info(f'{zeros_count} trajectories had no intersections')

    except EnvironmentError:
        log.error(f'could not write file: {output_filename}')
        return errno.EACCES

    return 0
def interpolate_trajectories(filename,
                             straight_interval=DEFAULT_STRAIGHT_INTERVAL,
                             turn_interval=DEFAULT_TURN_INTERVAL,
                             logging_msg_count=DEFAULT_LOGGING_COUNT):
    """
    Interpoates trajectory postions in a trajectories file.

    Outputs a postions file (in CSV format) with "trajectories" replaced by
    "ref_positions" in the filename.

    Parameters
    ----------
    filename: a string
        The name of the trajectories JSON file.

    straight_interval: float
        The time between positions along a straight leg [Seconds],
        default DEFAULT_STRAIGHT_INTERVAL.

    turn_interval: float
        The time between positions while turning [Seconds],
        default DEFAULT_TURN_INTERVAL.

    logging_msg_count: int
        The number of trajectories between logging count messages.
        default DEFAULT_LOGGING_COUNT.

    """
    trajectories_filename = os.path.basename(filename)
    is_bz2 = has_bz2_extension(filename)
    if is_bz2:  # remove the .bz2 from the end of the filename
        trajectories_filename = trajectories_filename[:-len(BZ2_FILE_EXTENSION)]

    if trajectories_filename[-len(JSON_FILE_EXTENSION):] != JSON_FILE_EXTENSION:
        log.error(f'Invalid file type: {trajectories_filename}, must be a JSON file.')
        return errno.EINVAL

    log.info(f'trajectories file: {filename}')
    log.info(f'Straight interval: {straight_interval} seconds')
    log.info(f'Turn interval: {turn_interval} seconds')

    output_filename = trajectories_filename.replace(TRAJECTORIES, SYNTH_POSITIONS)
    output_filename = output_filename.replace(JSON_FILE_EXTENSION,
                                              CSV_FILE_EXTENSION)
    try:
        with open(output_filename, 'w') as file:
            file.write(POSITION_FIELDS)

            # Interpolate the smoothed_trajectories into reference_positions
            flights_count = 0
            smoothed_trajectories = generate_SmoothedTrajectories(filename)
            for smooth_traj in smoothed_trajectories:
                try:
                    flight_id = smooth_traj.flight_id
                    ref_traj = interpolate_trajectory_positions(smooth_traj,
                                                                straight_interval, turn_interval)
                    ref_traj.to_csv(file, index=False, header=False, mode='a',
                                    date_format=ISO8601_DATETIME_US_FORMAT)

                    flights_count += 1
                    if not (flights_count % logging_msg_count):
                        log.info(f'{flights_count} trajectories interpolated')

                except ValueError:
                    log.exception(f'interpolate_trajectory flight id: {flight_id}')

                except StopIteration:
                    pass

            log.info(f'Finished interpolating {flights_count} trajectories.')

    except EnvironmentError:
        log.error(f'could not write file: {output_filename}')
        return errno.EACCES

    return 0
Ejemplo n.º 9
0
def clean_position_data(filename, max_speed=DEFAULT_MAX_SPEED,
                        distance_accuracy=DEFAULT_DISTANCE_ACCURACY):
    """
    Identify and remove invalid postions in a positions file.

    Outputs a positions file with "raw_" stripped from the start of the
    filename and an error metrics file.

    Parameters
    ----------
    filename: a string
        The name of the raw positions file.

    max_speed: float
        The maximum speed betwen valid positions [Knots].
        Default: DEFAULT_MAX_SPEED.

    distance_accuracy: float
        The accuracy of the positions [Nautical Miles].

    Returns
    -------
    An errno error_code if an error occured, zero otherwise.

    """
    positions_filename = os.path.basename(filename)
    is_bz2 = has_bz2_extension(positions_filename)
    if is_bz2:  # remove the .bz2 from the end of the filename
        positions_filename = positions_filename[:-len(BZ2_FILE_EXTENSION)]

    if positions_filename[-len(CSV_FILE_EXTENSION):] != CSV_FILE_EXTENSION:
        log.error(f'Invalid file type: {positions_filename}, must be a CSV file.')
        return errno.EINVAL

    log.info(f'positions file: {positions_filename}')
    log.info(f'Max speed: {max_speed} Knots')
    log.info(f'Distance accuracy: {distance_accuracy} NM')

    ##########################################################################
    # Create output filenames

    # strip raw_ from the start of the positions_filename
    output_filename = os.path.basename(positions_filename)[len(RAW) + 1:]

    error_metrics_filename = output_filename.replace(POSITIONS, ERROR_METRICS)

    ##########################################################################
    # Process the positions

    flights_count = 0
    with open(output_filename, 'w') as output_file, \
            open(error_metrics_filename, 'w') as error_file:
        output_file.write(POSITION_FIELDS)

        error_file.write(POSITION_ERROR_FIELDS)
        error_writer = csv.writer(error_file, lineterminator='\n')

        try:
            flight_positions = generate_positions(positions_filename)
            for position_lines in flight_positions:
                fields = position_lines[0].split(',')
                flight_id = fields[0]
                try:
                    position_string = ''.join(position_lines)
                    positions = pd.read_csv(StringIO(position_string),
                                            header=None, names=POSITION_FIELD_NAMES,
                                            parse_dates=['TIME'])

                    invalid_positions, error_metrics = \
                        find_invalid_positions(positions,
                                               max_speed=max_speed,
                                               distance_accuracy=distance_accuracy)

                    valid_positions = positions[~invalid_positions]
                    valid_positions.to_csv(output_file, index=False,
                                           header=False, mode='a',
                                           date_format=ISO8601_DATETIME_FORMAT)

                    error_metrics.insert(0, flight_id)
                    error_writer.writerow(error_metrics)

                    flights_count += 1

                except (ValueError, TypeError):
                    log.exception(f'find_invalid_positions flight id: {flight_id}')

                except StopIteration:
                    pass

            log.info(f'written file: {output_filename}')
            log.info(f'written file: {error_metrics_filename}')

        except EnvironmentError:
            log.error(f'could not read file: {positions_filename}')
            return errno.ENOENT

    log.info(f'positions cleaned for {flights_count} flights')
    return 0
Ejemplo n.º 10
0
def analyse_position_data(
        filename,
        across_track_tolerance=DEFAULT_ACROSS_TRACK_TOLERANCE,
        time_method=MOVING_AVERAGE_SPEED,
        N=DEFAULT_MOVING_MEDIAN_SAMPLES,
        M=DEFAULT_MOVING_AVERAGE_SAMPLES,
        max_duration=DEFAULT_SPEED_MAX_DURATION,
        logging_msg_count=DEFAULT_LOGGING_COUNT):
    """
    Analyse trajectory postions in a positions file.

    Outputs a trajectory file (in JSON format) with "positions" replaced by
    "trajectories" and with the time_method and across_track_tolerance at the
    start of the filename.

    It also outputs a trajectories metrics file in csv format.

    Parameters
    ----------
    filename: a string
        The name of the positions file.

    across_track_tolerance: float
        The maximum across track distance [Nautical Miles],
        default DEFAULT_ACROSS_TRACK_TOLERANCE.

    method: string
        The smoothing method to use: 'mas', 'lm', 'trf' 'dogbox',
        default MOVING_AVERAGE_SPEED.

    N : integer
        The number of samples to consider for the speed moving median filter,
        default DEFAULT_MOVING_MEDIAN_SAMPLES.

    M : integer
        The number of samples to consider for the speed moving average filter,
        default DEFAULT_MOVING_AVERAGE_SAMPLES.

    max_duration: float
        The maximum time between points to smooth when calculating speed [Seconds],
        default DEFAULT_SPEED_MAX_DURATION.

    logging_msg_count: int
        The number of trajectories between logging count messages.
        default DEFAULT_LOGGING_COUNT.

    Returns
    -------
    An errno error_code if an error occured, zero otherwise.

    """
    positions_filename = os.path.basename(filename)
    is_bz2 = has_bz2_extension(positions_filename)
    if is_bz2:  # remove the .bz2 from the end of the filename
        positions_filename = positions_filename[:-len(BZ2_FILE_EXTENSION)]

    if positions_filename[-len(CSV_FILE_EXTENSION):] != CSV_FILE_EXTENSION:
        log.error(
            f'Invalid file type: {positions_filename}, must be a CSV file.')
        return errno.EINVAL

    log.info(f'positions file: {filename}')
    log.info(f'across track tolerance: {across_track_tolerance} NM')
    log.info(f'time analysis method: {time_method}')
    if time_method == MOVING_AVERAGE_SPEED:
        log.info(f'moving median samples: {N}')
        log.info(f'moving average samples: {M}')
        log.info(f'speed filter maximum duration: {max_duration}')

    ##########################################################################
    # Create output filenames

    # add the time_method and tolerance to the front of the filename
    tolerance_string = str(across_track_tolerance).replace('.', '')
    positions_filename = '_'.join(
        [time_method, tolerance_string, positions_filename])

    trajectory_filename = positions_filename.replace(POSITIONS, TRAJECTORIES)
    trajectory_filename = trajectory_filename.replace(CSV_FILE_EXTENSION,
                                                      JSON_FILE_EXTENSION)

    traj_metrics_filename = positions_filename.replace(POSITIONS, TRAJ_METRICS)

    ##########################################################################
    # Process the positions

    flights_count = 0
    with open(trajectory_filename, 'w') as output_file, \
            open(traj_metrics_filename, 'w') as metrics_file:
        output_file.write(
            write_SmoothedTrajectories_json_header(time_method,
                                                   across_track_tolerance, N,
                                                   M, max_duration))
        metrics_file.write(POSITION_METRICS_FIELDS)
        metrics_writer = csv.writer(metrics_file, lineterminator='\n')

        try:
            flight_positions = generate_positions(filename)
            for position_lines in flight_positions:

                # Ignore single point trajectories
                if len(position_lines) < 2:
                    continue

                fields = position_lines[0].split(',')
                flight_id = fields[0]
                try:
                    position_string = ''.join(position_lines)
                    positions = pd.read_csv(StringIO(position_string),
                                            header=None,
                                            names=POSITION_FIELD_NAMES,
                                            parse_dates=['TIME'])

                    smoothed_traj, quality_metrics = \
                        analyse_trajectory(flight_id, positions,
                                           across_track_tolerance,
                                           time_method, N, M,
                                           max_duration)

                    string_list = smoothed_traj.dumps()
                    if flights_count:
                        # delimit with a comma between flights
                        string_list.insert(0, ', ')
                    output_file.write(''.join(string_list))

                    metrics_writer.writerow(quality_metrics)

                    flights_count += 1
                    if not (flights_count % logging_msg_count):
                        log.info(f'{flights_count} flights analysed')

                except (ValueError, IndexError, TypeError):
                    log.exception(f'analyse_trajectory flight id: {flight_id}')

                except StopIteration:
                    pass

            output_file.write(SMOOTHED_TRAJECTORY_JSON_FOOTER)
            log.info(f'written file: {trajectory_filename}')
            log.info(f'written file: {traj_metrics_filename}')

        except EnvironmentError:
            log.error(f'could not read file: {filename}')
            return errno.ENOENT

    log.info(f'analyse_trajectory finished for {flights_count} flights')
    return 0
def find_airport_intersections(
        flights_filename,
        trajectories_filename,
        radius=DEFAULT_RADIUS,
        airports_filename=DEFAULT_MOVEMENTS_AIRPORTS_FILENAME,
        distance_tolerance=DEFAULT_DISTANCE_TOLERANCE):
    """
    Find intersections between trajectories and airport cylinders.

    Parameters
    ----------
    flights_filename: a string
        The name of a flights file.

    trajectories_filename: a string
        The name of a trajectories file.

    radius: float
        The radius of the cylinder aroud each airport [Nautical Miles],
        default DEFAULT_RADIUS.

    airports_filename: a string
        The name of the airports file, default DEFAULT_MOVEMENTS_AIRPORTS_FILENAME.

    distance_tolerance: float
        The tolerance for path and cylinder distances,
        default DEFAULT_DISTANCE_TOLERANCE.

    Returns
    -------
    An errno error_code if an error occured, zero otherwise.

    """
    # Extract the date string from the filename and validate it
    flights_date = read_iso8601_date_string(flights_filename)
    if is_valid_iso8601_date(flights_date):
        log.info(f'flights file: {flights_filename}')
    else:
        log.error(
            f'flights file: {flights_filename}, invalid date: {flights_date}')
        return errno.EINVAL

    trajectories_date = read_iso8601_date_string(trajectories_filename,
                                                 is_json=True)
    if is_valid_iso8601_date(trajectories_date):
        log.info(f'trajectories file: {trajectories_filename}')
    else:
        log.error(f'trajectories file, invalid date: {trajectories_date}')
        return errno.EINVAL

    if flights_date != trajectories_date:
        log.error(
            f'Files are not for the same date! Flights date: {flights_date}'
            f', trajectories date: {trajectories_date}')
        return errno.EINVAL

    log.info(f'flights file: {flights_filename}')
    log.info(f'trajectories file: {trajectories_filename}')
    log.info(f'radius: {radius} NM')
    log.info(f'distance_tolerance: {distance_tolerance} NM')

    airports_df = pd.DataFrame()
    try:
        airports_df = pd.read_csv(airports_filename,
                                  index_col='AIRPORT',
                                  memory_map=True)

        log.info(f'{airports_filename} read ok')
    except EnvironmentError:
        log.error(f'could not read file: {airports_filename}')
        return errno.ENOENT

    flights_df = pd.DataFrame()
    try:
        flights_df = pd.read_csv(flights_filename,
                                 usecols=['FLIGHT_ID', 'ADEP', 'ADES'],
                                 index_col='FLIGHT_ID',
                                 memory_map=True)

        log.info(f'{flights_filename} read ok')
    except EnvironmentError:
        log.error(f'could not read file: {flights_filename}')
        return errno.ENOENT

    # Determine the departure and arrival flights
    departures_df = pd.merge(flights_df,
                             airports_df,
                             left_on='ADEP',
                             right_index=True)
    destinations_df = pd.merge(flights_df,
                               airports_df,
                               left_on='ADES',
                               right_index=True)

    trajectories_filename = os.path.basename(trajectories_filename)
    is_bz2 = has_bz2_extension(trajectories_filename)
    if is_bz2:  # remove the .bz2 from the end of the filename
        trajectories_filename = trajectories_filename[:-len(BZ2_FILE_EXTENSION
                                                            )]

    # Write the airport_intersections into a csv file with output_filename
    output_filename = trajectories_filename.replace(TRAJECTORIES,
                                                    AIRPORT_INTERSECTIONS)
    output_filename = output_filename.replace(JSON_FILE_EXTENSION,
                                              CSV_FILE_EXTENSION)
    try:
        with open(output_filename, 'w') as file:
            file.write(AIRPORT_INTERSECTION_FIELDS)

            flights_count = 0
            smoothed_trajectories = generate_SmoothedTrajectories(
                trajectories_filename)
            for smooth_traj in smoothed_trajectories:
                try:
                    flight_id = smooth_traj.flight_id

                    is_departure = flight_id in departures_df.index
                    is_arrival = flight_id in destinations_df.index

                    if is_departure or is_arrival:

                        traj_path = smooth_traj.path.ecef_path()

                        if is_departure:
                            dep_row = departures_df.loc[flight_id]
                            departure = dep_row['ADEP']
                            if len(departure) == AIRPORT_NAME_LENGTH:
                                latitude = dep_row['LATITUDE']
                                longitude = dep_row['LONGITUDE']
                                ref_point = global_Point3d(latitude, longitude)
                                dep_intersection = find_airport_intersection(
                                    smooth_traj, traj_path, departure,
                                    ref_point, radius, False,
                                    distance_tolerance)
                                if not dep_intersection.empty:
                                    dep_intersection.to_csv(
                                        file,
                                        index=False,
                                        header=False,
                                        mode='a',
                                        date_format=ISO8601_DATETIME_US_FORMAT)

                        if is_arrival:
                            dest_row = destinations_df.loc[flight_id]
                            destination = dest_row['ADES']
                            if len(destination) == AIRPORT_NAME_LENGTH:
                                latitude = dest_row['LATITUDE']
                                longitude = dest_row['LONGITUDE']
                                ref_point = global_Point3d(latitude, longitude)
                                dest_intersection = find_airport_intersection(
                                    smooth_traj, traj_path, destination,
                                    ref_point, radius, True,
                                    distance_tolerance)
                                if not dest_intersection.empty:
                                    dest_intersection.to_csv(
                                        file,
                                        index=False,
                                        header=False,
                                        mode='a',
                                        date_format=ISO8601_DATETIME_US_FORMAT)

                    flights_count += 1

                except ValueError:
                    log.exception(
                        f'find_airport_intersections id: {flight_id}')

                except StopIteration:
                    pass

            log.info(
                f'find_airport_intersections finished for {flights_count} trajectories.'
            )

    except EnvironmentError:
        log.error(f'could not write file: {output_filename}')
        return errno.EACCES

    return 0
Ejemplo n.º 12
0
def convert_fr24_data(filenames):

    flights_filename = filenames[0]
    points_filename = filenames[1]

    if flights_filename == points_filename:
        log.error(
            'Files are the same! Flights filename: %s, points filename: %s',
            flights_filename, points_filename)
        return errno.EINVAL

    # Extract the date string from the filename and validate it
    flights_date = read_iso8601_date_string(flights_filename)
    if is_valid_iso8601_date(flights_date):
        log.info('fr24 flights file: %s', flights_filename)
    else:
        log.error('fr24 flights file: %s, invalid date: %s', flights_filename,
                  flights_date)
        return errno.EINVAL

    # Extract the date string from the filename and validate it
    points_date = read_iso8601_date_string(points_filename)
    if is_valid_iso8601_date(points_date):
        log.info('fr24 points file: %s', points_filename)
    else:
        log.error('fr24 points file: %s, invalid date: %s', points_filename,
                  points_date)
        return errno.EINVAL

    if flights_date != points_date:
        log.error(
            'Files are not for the same date! Flights date: %s, points date: %s',
            flights_date, points_date)
        return errno.EINVAL

    # A dict to hold the ADS-B flights
    flights = {}

    # Read the ADS-B flights file into flights
    try:
        is_bz2 = has_bz2_extension(flights_filename)
        with bz2.open(flights_filename, 'rt',  newline="") if (is_bz2) else \
                open(flights_filename, 'r') as file:
            reader = csv.reader(file, delimiter=',')
            next(reader, None)  # skip the headers
            for row in reader:
                flights.setdefault(row[AdsbFlightField.FLIGHT_ID],
                                   AdsbFlight(row))

    except EnvironmentError:
        log.error('could not read file: %s', flights_filename)
        return errno.ENOENT

    log.info('fr24 flights read ok')

    # Read the ADS-B points file into flights
    try:
        is_bz2 = has_bz2_extension(points_filename)
        with bz2.open(points_filename, 'rt',  newline="") if (is_bz2) else \
                open(points_filename, 'r') as file:
            reader = csv.reader(file, delimiter=',')
            next(reader, None)  # skip the headers
            for row in reader:
                if row[AdsbPointField.FLIGHT_ID] in flights:
                    flights[row[AdsbPointField.FLIGHT_ID]].append(row)

    except EnvironmentError:
        log.error('could not read file: %s', points_filename)
        return errno.ENOENT

    log.info('fr24 points read ok')

    # sort positions in date time (of position) order
    for key, values in flights.items():
        values.sort()
    log.info('fr24 points sorted')

    valid_flights = 0

    # Output the ADS-B flight data for all flights
    output_files = create_convert_fr24_filenames(flights_date)
    flight_file = output_files[0]
    try:
        with open(flight_file, 'w') as file:
            file.write(FLIGHT_FIELDS)
            for key, values in sorted(flights.items()):
                if values.is_valid:
                    print(values, file=file)
                    valid_flights += 1

        log.info('written file: %s', flight_file)

    except EnvironmentError:
        log.error('could not write file: %s', flight_file)

    # Output the ADS-B position data for all flights
    positions_file = output_files[1]
    try:
        with open(positions_file, 'w') as file:
            file.write(POSITION_FIELDS)
            for key, values in sorted(flights.items()):
                if values.is_valid:
                    for pos in values.positions:
                        print(pos, file=file)

        log.info('written file: %s', positions_file)

    except EnvironmentError:
        log.error('could not write file: %s', positions_file)
        return errno.EACCES

    log.info('fr24 conversion complete for %s flights on %s', valid_flights,
             points_date)

    return 0
Ejemplo n.º 13
0
def convert_apds_data(filename, stands_filename):

    # Extract the start and finish date strings from the filename
    start_date, finish_date = split_dual_date(os.path.basename(filename))
    if not is_valid_iso8601_date(start_date):
        log.error('apds data file: %s, invalid start date: %s', filename,
                  start_date)
        return errno.EINVAL

    # validate the finish date string from the filename
    if not is_valid_iso8601_date(finish_date):
        log.error('apds data file: %s, invalid finish date: %s', filename,
                  finish_date)
        return errno.EINVAL

    log.info('apds data file: %s', filename)

    airport_stands_df = pd.DataFrame()
    if stands_filename:
        try:
            airport_stands_df = pd.read_csv(stands_filename,
                                            index_col=['ICAO_ID', 'STAND_ID'],
                                            memory_map=True)
            airport_stands_df.sort_index()
        except EnvironmentError:
            log.error('could not read file: %s', stands_filename)
            return errno.ENOENT

        log.info('airport stands file: %s', stands_filename)
    else:
        log.info('airport stands not provided')

    # A dict to hold the APDS flights
    flights = {}

    # Read the APDS flights file into flights
    try:
        is_bz2 = has_bz2_extension(filename)
        with bz2.open(filename, 'rt',  newline="") if (is_bz2) else \
                open(filename, 'r') as file:
            reader = csv.reader(file, delimiter=',')
            next(reader, None)  # skip the headers
            for row in reader:
                flights.setdefault(row[ApdsField.APDS_ID],
                                   ApdsFlight(row, airport_stands_df))

    except EnvironmentError:
        log.error('could not read file: %s', filename)
        return errno.ENOENT

    log.info('apds flights read ok')

    valid_flights = 0

    # Output the APDS flight data
    # finish_date
    output_files = create_convert_apds_filenames(start_date, finish_date)
    flight_file = output_files[0]
    try:
        with open(flight_file, 'w') as file:
            file.write(FLIGHT_FIELDS)
            for key, value in sorted(flights.items()):
                print(value, file=file)
                valid_flights += 1

        log.info('written file: %s', flight_file)

    except EnvironmentError:
        log.error('could not write file: %s', flight_file)

    # if airport stand data was provided
    if len(airport_stands_df):
        # Output the APDS position data
        positions_file = output_files[1]
        try:
            with open(positions_file, 'w') as file:
                file.write(POSITION_FIELDS)
                for key, value in sorted(flights.items()):
                    for event in sorted(value.positions):
                        print(event, file=file)

            log.info('written file: %s', positions_file)

        except EnvironmentError:
            log.error('could not write file: %s', positions_file)

    # Output the APDS event data
    event_file = output_files[2]
    try:
        with open(event_file, 'w') as file:
            file.write(FLIGHT_EVENT_FIELDS)
            for key, value in sorted(flights.items()):
                for event in sorted(value.events):
                    print(event, file=file)

        log.info('written file: %s', event_file)

    except EnvironmentError:
        log.error('could not write file: %s', event_file)
        return errno.EACCES

    log.info('apds conversion complete for %s flights on %s', valid_flights,
             start_date)

    return 0