예제 #1
0
def merge_cpr_fr24_data(date, *, max_speed=DEFAULT_MAX_SPEED,
                        distance_accuracy=DEFAULT_DISTANCE_ACCURACY):
    """
    Match, merge and clean refined CPR and FR24 ADS-B data for the given date.

    Parameters
    ----------
    date: string
        The date in ISO8601 format, e.g. 2017-08-16

    max_speed: string
        The maximum ground speed permitted between adjacent positions [Knots],
        default: 750 Knots.

    distance_accuracy: string
        The maximum distance between positions at the same time [Nautical Miles],
        default: 0.25 NM.

    Returns
    -------
        True if succesful, False otherwise.

    """
    if is_valid_iso8601_date(date):
        os.chdir(REFINED_DIR)
        tasks.match_cpr_adsb_trajectories_on(date)
        tasks.merge_cpr_adsb_trajectories_on(date)

        return tasks.clean_raw_positions_data(CPR_FR24, date, float(max_speed),
                                              float(distance_accuracy))
    else:
        log.error("The date is not valid: %s", date)
        return False
예제 #2
0
def convert_airport_ids(flights_filename,
                        airports_filename=DEFAULT_AIRPORTS_FILENAME):

    # Validate the filename
    filename_start = os.path.basename(flights_filename)[:len(IATA)]
    if filename_start != IATA:
        log.error('File is not an iata_ flights file: %s', flights_filename)
        return errno.EINVAL

    # Extract the date string from the filename and validate it
    flights_date = read_iso8601_date_string(flights_filename)
    if not is_valid_iso8601_date(flights_date):
        log.error('iata fr24 flights file: %s, invalid date: %s',
                  flights_filename, flights_date)
        return errno.EINVAL

    log.info('iata fr24 file: %s', flights_filename)
    log.info('airports file: %s', airports_filename)

    # Read the flights into a pandas DataFrame
    flights_df = pd.DataFrame()
    try:
        flights_df = pd.read_csv(flights_filename, memory_map=True)
    except EnvironmentError:
        log.error('could not read file: %s', flights_filename)
        return errno.ENOENT

    log.info('flights file read ok')

    # Read the airports into a pandas DataFrame
    airports_df = pd.DataFrame()
    try:
        airports_df = pd.read_csv(airports_filename, memory_map=True)
    except EnvironmentError:
        log.error('could not read file: %s', airports_filename)
        return errno.ENOENT

    log.info('airports file read ok')

    # Create a new dataframe indexed by IATA_AP_CODE, referencing ICAO_AP_CODE
    airport_codes_df = airports_df.set_index('IATA_AP_CODE')['ICAO_AP_CODE']

    # Replace flight IATA airport codes with ICAO airport codes
    flights_df['ADEP'] = flights_df['ADEP'].replace(airport_codes_df)
    flights_df['ADES'] = flights_df['ADES'].replace(airport_codes_df)

    log.info('airport ids converted')

    output_filename = create_flights_filename(FR24, flights_date)
    try:
        flights_df.to_csv(output_filename, index=False)

        log.info('written file: %s', output_filename)
    except EnvironmentError:
        log.error('could not write file: %s', output_filename)
        return errno.EACCES

    log.info('airports conversion complete')

    return 0
예제 #3
0
def refine_cpr_data(date, *, max_speed=DEFAULT_MAX_SPEED,
                    distance_accuracy=DEFAULT_DISTANCE_ACCURACY):
    """
    Refine a CPR file for the given date.

    Parameters
    ----------
    date: string
        The date in ISO8601 format, e.g. 2017-08-16:q:q

    max_speed: string
        The maximum ground speed permitted between adjacent positions [Knots],
        default: 750 Knots.

    distance_accuracy: string
        The maximum distance between positions at the same time [Nautical Miles],
        default: 0.25 NM.

    Returns
    -------
        True if succesful, False otherwise.

    """
    if is_valid_iso8601_date(date):
        os.chdir(REFINED_DIR)
        tasks.convert_cpr_file(date)
        return tasks.clean_raw_positions_data(CPR, date, float(max_speed),
                                              float(distance_accuracy))
    else:
        log.error("The date is not valid: %s", date)
        return False
예제 #4
0
def merge_cpr_fr24_overnight_flights(date):
    """
    Match, merge and clean merged CPR and FR24 ADS-B data for the given date,
    with merged CPR and FR24 ADS-B data for the previous day.

    Parameters
    ----------
    date: string
        The date in ISO8601 format, e.g. 2017-08-16

    max_speed: string
        The maximum ground speed permitted between adjacent positions [Knots],
        default: 750 Knots.

    distance_accuracy: string
        The maximum distance between positions at the same time [Nautical Miles],
        default: 0.25 NM.

    Returns
    -------
        True if succesful, False otherwise.

    """
    if is_valid_iso8601_date(date):
        os.chdir(REFINED_DIR)
        if tasks.match_previous_days_flights(date):
            if tasks.merge_previous_days_data(date):
                return tasks.clean_overnight_cpr_fr24_positions(date)
    else:
        log.error("The date is not valid: %s", date)

    return False
def match_overnight_flights_on_day(
        date, max_time_difference=DEFAULT_MAXIMUM_TIME_DELTA):
    """
    Match flights for the given day with flights for the previous day.

    Parameters
    ----------
    date: string
        The date in ISO8601 format, e.g. 2017-08-16

    """
    if is_valid_iso8601_date(date):

        # get the CPR data from the Google bucket
        log.info(f'Getting data for date: {date}')

        match_flights_files = create_match_overnight_flights_input_filenames(
            date)
        if not get_processed(REFINED_MERGED_DAILY_CPR_FR24,
                             match_flights_files):
            log.error('Flights file not found in daily_cpr_fr24 bucket')
            return errno.ENOENT

        error_code = match_overnight_flights(match_flights_files,
                                             max_time_difference)
        if error_code:
            return error_code
        gc.collect()

        prev_ids_filename = create_matching_ids_filename(PREV_DAY, date)
        if not put_processed(REFINED_MERGED_OVERNIGHT_CPR_FR24_IDS,
                             [prev_ids_filename]):
            log.error('Could not write ids to overnight_cpr_fr24/ids bucket')
            return errno.EACCES

        extract_data_input_files = create_extract_overnight_data_input_filenames(
            date)
        if not get_processed(REFINED_MERGED_DAILY_CPR_FR24,
                             extract_data_input_files[2:]):
            log.error(
                'Positions or events file not found in daily_cpr_fr24 bucket')
            return errno.ENOENT

        error_code = extract_overnight_data(extract_data_input_files)
        if error_code:
            return error_code

        extract_data_output_files = create_extract_overnight_data_output_filenames(
            date)
        if not put_processed(REFINED_MERGED_OVERNIGHT_CPR_FR24,
                             extract_data_output_files):
            log.error('Could not write to overnight_cpr_fr24 bucket')
            return errno.EACCES

    else:
        log.error(f'invalid date: {date}')
        return errno.EINVAL

    return 0
예제 #6
0
def extract_fleet_data(flights_filename):

    # Extract the date string from the filename and validate it
    flights_date = read_iso8601_date_string(flights_filename)
    if not is_valid_iso8601_date(flights_date):
        log.error('fr24 flights file: %s, invalid date: %s', flights_filename,
                  flights_date)
        return errno.EINVAL

    log.info('fr24 flights file: %s', flights_filename)

    # Read the flights into a pandas DataFrame
    flights_df = pd.DataFrame()
    try:
        flights_df = pd.read_csv(flights_filename,
                                 usecols=[
                                     'AIRCRAFT_REG', 'AIRCRAFT_TYPE',
                                     'AIRCRAFT_ADDRESS', 'PERIOD_START'
                                 ],
                                 memory_map=True)
    except EnvironmentError:
        log.error('could not read file: %s', flights_filename)
        return errno.ENOENT

    log.info('flights file read ok')

    # Get the rows with non-null AIRCRAFT_REG fields
    valid_reg_df = flights_df.loc[flights_df['AIRCRAFT_REG'].notnull()]

    # Create a fleet pandas DataFrame with rows sorted by AIRCRAFT_REG then PERIOD_START
    # Remove rows with duplicate: AIRCRAFT_REG, AIRCRAFT_TYPE & AIRCRAFT_ADDRESS
    fleet_df = valid_reg_df.sort_values(by=['AIRCRAFT_REG', 'PERIOD_START'])
    fleet_df.drop_duplicates(
        subset=['AIRCRAFT_REG', 'AIRCRAFT_TYPE', 'AIRCRAFT_ADDRESS'],
        inplace=True)

    # Output the fleet DataFrame to a '.csv file
    output_filename = create_fleet_data_filename(flights_date)
    try:
        fleet_df.to_csv(output_filename, index=False)
    except EnvironmentError:
        log.error('could not write file: %s', output_filename)
        return errno.EACCES

    log.info('written file: %s', output_filename)
    log.info('fleet data extraction complete')

    return 0
예제 #7
0
def refine_fr24_data(date, *, max_speed=DEFAULT_MAX_SPEED,
                     distance_accuracy=DEFAULT_DISTANCE_ACCURACY,
                     airports_filename=DEFAULT_AIRPORTS_FILENAME):
    """
    Refine FR24 ADS-B data for the given date.

    Parameters
    ----------
    date: string
        The date in ISO8601 format, e.g. 2017-08-16

    max_speed: string
        The maximum ground speed permitted between adjacent positions [Knots],
        default: 750 Knots.

    distance_accuracy: string
        The maximum distance between positions at the same time [Nautical Miles],
        default: 0.25 NM.

    airports_filename: string
        The name of a file containing airport codes for IATA to ICAO conversion,
        default: airports.csv

    Returns
    -------
        True if succesful, False otherwise.

    """
    if is_valid_iso8601_date(date):
        os.chdir(REFINED_DIR)
        tasks.convert_fr24_date(date)

        # Note: these 3 tasks could run in parallel
        tasks.clean_raw_positions_data(FR24, date, float(max_speed),
                                       float(distance_accuracy))
        tasks.convert_airport_codes(date, airports_filename)
        return tasks.extract_fleet_date(date)

    else:
        log.error("The date is not valid: %s", date)
        return False
예제 #8
0
def validate_data_type_and_date(data_type, date=None):
    """
    Unprocessed data items are identified by type and date.

    Returns a tuple with a valid flag and if in error an error message.

    Parameters
    ----------
    data_type the data type
    date optional a date to match the data
    """
    valid_type = VALID_DATA_TYPES.__contains__(data_type)
    if date:
        valid_date = is_valid_iso8601_date(date)
    else:
        valid_date = True
    if valid_type and valid_date:
        return True, None
    else:
        return False, "Validation failure, data_type: " + str(valid_type) + \
            " date: " + str(valid_date)
def merge_consecutive_day_trajectories(filenames):

    day_ids_filename = filenames[0]

    prev_flights_filename = filenames[1]
    next_flights_filename = filenames[2]

    prev_positions_filename = filenames[3]
    next_positions_filename = filenames[4]

    prev_events_filename = filenames[5]
    next_events_filename = filenames[6]

    # Extract date strings from the input filenames and validate them
    input_date_strings = [''] * len(input_filenames)
    for i in range(len(input_filenames)):
        filename = filenames[i]
        input_date_strings[i] = read_iso8601_date_string(filename)
        if is_valid_iso8601_date(input_date_strings[i]):
            log.info('%s: %s', input_filenames[i], filename)
        else:
            log.error('%s: %s, invalid date: %s', input_filenames[i], filename,
                      input_date_strings[i])
            return errno.EINVAL

    prev_date = input_date_strings[1]
    next_date = input_date_strings[2]

    if (input_date_strings[0] != next_date) \
            or (next_date <= prev_date) \
            or (prev_date != input_date_strings[3]) \
            or (prev_date != input_date_strings[5]) \
            or (next_date != input_date_strings[4]) \
            or (next_date != input_date_strings[6]):
        log.error("Files are not for the correct dates: %s,%s,%s,%s,%s,%s,%s",
                  input_date_strings[0], input_date_strings[1],
                  input_date_strings[2], input_date_strings[3],
                  input_date_strings[4], input_date_strings[5],
                  input_date_strings[6])
        return errno.EINVAL

    ############################################################################

    # Read the Id file
    ids_df = pd.DataFrame()
    try:
        ids_df = pd.read_csv(day_ids_filename,
                             index_col='FLIGHT_ID',
                             converters={
                                 'FLIGHT_ID': lambda x: UUID(x),
                                 'NEW_FLIGHT_ID': lambda x: UUID(x)
                             },
                             memory_map=True)
    except EnvironmentError:
        log.error('could not read file: %s', day_ids_filename)
        return errno.ENOENT

    # Merge flights
    if not merge_flights(prev_flights_filename, next_flights_filename, ids_df,
                         log):
        return errno.ENOENT

    # free memory used by merge_flights
    gc.collect()

    # Merge positions
    if not merge_next_day_items(prev_positions_filename,
                                next_positions_filename, ids_df, log):
        return errno.ENOENT

    # free memory used by merge_next_day_items
    gc.collect()

    # Merge events
    if not merge_next_day_items(prev_events_filename, next_events_filename,
                                ids_df, log):
        return errno.ENOENT

    log.info('merging complete')

    return 0
예제 #10
0
def import_data_on_day(date,
                       max_speed=DEFAULT_MAX_SPEED,
                       distance_accuracy=DEFAULT_DISTANCE_ACCURACY):
    """
    Import, refine and merge the CPR and FR24 data for the given date.

    Parameters
    ----------
    date: string
        The date in ISO8601 format, e.g. 2017-08-16

    max_speed: string
        The maximum ground speed permitted between adjacent positions [Knots],
        default: 750 Knots.

    distance_accuracy: string
        The maximum distance between positions at the same time [Nautical Miles],
        default: 0.25 NM.

    """
    if is_valid_iso8601_date(date):

        log.info(f'Getting data for date: {date}')

        # convert the FR24 data
        # Note: needs over 13GB memory if run with convert_cpr_data.
        get_unprocessed(FR24, date, '.')
        convert_fr24_data_on_day(date)
        gc.collect()

        # Clean the FR24 data in parallel
        procs = []
        procs.append(
            clean_raw_positions_data(FR24, date, max_speed, distance_accuracy))

        # convert the CPR data in parallel
        get_unprocessed(CPR, date, '.')
        cpr_proc = convert_cpr_data_on_day(date)

        # write the converted FR24 data to the Google bucket
        put_processed(REFINED_FR24, create_convert_fr24_filenames(date))

        process_fr24_flights(date, DEFAULT_AIRPORTS_FILENAME)
        gc.collect()

        # Wait for CPR conversion to finish
        out, err = cpr_proc.communicate()
        print(out)
        gc.collect()

        # Clean the CPR and FR24 data in parallel
        procs.append(
            clean_raw_positions_data(CPR, date, max_speed, distance_accuracy))

        # write the converted CPR data to the Google bucket
        put_processed(REFINED_CPR, create_convert_cpr_filenames(date))

        # Wait for the CPR and FR24 cleaning tasks
        for proc in procs:
            out, err = proc.communicate()
            print(out)

        # write the CPR and FR24 positions to the Google bucket
        write_clean_positions_data(CPR, date)
        write_clean_positions_data(FR24, date)
        gc.collect()

        merge_cpr_and_fr24_data(date)
        gc.collect()

        # Clean the merged positions
        proc = clean_raw_positions_data(CPR_FR24, date, max_speed,
                                        distance_accuracy)

        # put the merged CPR and FR24 data to the Google bucket
        put_processed(REFINED_MERGED_DAILY_CPR_FR24_IDS,
                      create_match_cpr_adsb_output_filenames(date))
        put_processed(REFINED_MERGED_DAILY_CPR_FR24,
                      create_merge_cpr_adsb_output_filenames(date))

        # Wait for cleaning to finish
        out, err = proc.communicate()
        print(out)

        # put the merged CPR and FR24 positions to the Google bucket
        write_clean_positions_data(CPR_FR24, date)

    else:
        log.error(f'invalid date: {date}')
        return errno.EINVAL

    return 0
def merge_overnight_flight_data(filenames):
    """
    Merge the positions and events data and update flight data with new times.
    """
    new_flights_filename = filenames[0]

    new_positions_filename = filenames[1]
    overnight_positions_filename = filenames[2]

    new_events_filename = filenames[3]
    overnight_events_filename = filenames[4]

    # Extract date strings from the input filenames and validate them
    input_date_strings = [''] * len(input_filenames)
    for i in range(len(input_filenames)):
        filename = filenames[i]
        input_date_strings[i] = read_iso8601_date_string(filename)
        if is_valid_iso8601_date(input_date_strings[i]):
            log.info('%s: %s', input_filenames[i], filename)
        else:
            log.error('%s: %s, invalid date: %s',
                      input_filenames[i], filename, input_date_strings[i])
            return errno.EINVAL

    date = input_date_strings[0]

    if (date != input_date_strings[1]) \
            or (date != input_date_strings[2]) \
            or (date != input_date_strings[3]) \
            or (date != input_date_strings[4]):
        log.error("Files are not for the correct dates: %s,%s,%s,%s,%s",
                  input_date_strings[0], input_date_strings[1],
                  input_date_strings[2], input_date_strings[3],
                  input_date_strings[4])
        return errno.EINVAL

    ############################################################################

    flights_df = pd.DataFrame()
    try:
        flights_df = pd.read_csv(new_flights_filename,
                                 converters={'FLIGHT_ID': lambda x: UUID(x)},
                                 memory_map=True)
    except EnvironmentError:
        log.error('could not read file: %s', new_flights_filename)
        return errno.ENOENT

    overnight_pos_df = pd.DataFrame()
    try:
        overnight_pos_df = pd.read_csv(overnight_positions_filename, parse_dates=['TIME'],
                                       converters={'FLIGHT_ID': lambda x: UUID(x)},
                                       memory_map=True)
    except EnvironmentError:
        log.error('could not read file: %s', overnight_positions_filename)
        return errno.ENOENT

    # Update the flight data with the new times of the overnight positions
    update_flight_data(flights_df, overnight_pos_df)

    flights_filename = new_flights_filename[4:]
    try:
        flights_df.to_csv(flights_filename, index=False,
                          date_format=ISO8601_DATETIME_FORMAT)
        log.info('written file: %s', flights_filename)
    except EnvironmentError:
        log.error('could not write file: %s', flights_filename)
        return errno.ENOENT

    ############################################################################

    # Now merge the positions
    merged_positions = merge_overnight_items(new_positions_filename, overnight_pos_df)
    if merged_positions.empty:
        log.error('Error merging: %s', new_positions_filename)
        return errno.ENOENT

    # write merged position data
    raw_positions_filename = '_'.join([RAW, new_positions_filename[4:]])
    try:
        merged_positions.to_csv(raw_positions_filename, index=False,
                                date_format=ISO8601_DATETIME_FORMAT)
        log.info('written file: %s', raw_positions_filename)
    except EnvironmentError:
        log.error('could not write file: %s', raw_positions_filename)
        return errno.ENOENT

    ############################################################################

    # # Merge the events
    overnight_events_df = pd.DataFrame()
    try:
        overnight_events_df = pd.read_csv(overnight_events_filename,
                                          parse_dates=['TIME'],
                                          converters={'FLIGHT_ID': lambda x: UUID(x)},
                                          memory_map=True)
    except EnvironmentError:
        log.error('could not read file: %s', overnight_events_filename)
        return errno.ENOENT

    merged_events = merge_overnight_items(new_events_filename, overnight_events_df)
    if merged_events.empty:
        log.error('Error merging: %s', new_events_filename)
        return errno.ENOENT

    events_filename = new_events_filename[4:]
    try:
        merged_events.to_csv(events_filename, index=False,
                             date_format=ISO8601_DATETIME_FORMAT)
        log.info('written file: %s', events_filename)
    except EnvironmentError:
        log.error('could not write file: %s', events_filename)
        return errno.ENOENT

    return 0
def find_airport_intersections(
        flights_filename,
        trajectories_filename,
        radius=DEFAULT_RADIUS,
        airports_filename=DEFAULT_MOVEMENTS_AIRPORTS_FILENAME,
        distance_tolerance=DEFAULT_DISTANCE_TOLERANCE):
    """
    Find intersections between trajectories and airport cylinders.

    Parameters
    ----------
    flights_filename: a string
        The name of a flights file.

    trajectories_filename: a string
        The name of a trajectories file.

    radius: float
        The radius of the cylinder aroud each airport [Nautical Miles],
        default DEFAULT_RADIUS.

    airports_filename: a string
        The name of the airports file, default DEFAULT_MOVEMENTS_AIRPORTS_FILENAME.

    distance_tolerance: float
        The tolerance for path and cylinder distances,
        default DEFAULT_DISTANCE_TOLERANCE.

    Returns
    -------
    An errno error_code if an error occured, zero otherwise.

    """
    # Extract the date string from the filename and validate it
    flights_date = read_iso8601_date_string(flights_filename)
    if is_valid_iso8601_date(flights_date):
        log.info(f'flights file: {flights_filename}')
    else:
        log.error(
            f'flights file: {flights_filename}, invalid date: {flights_date}')
        return errno.EINVAL

    trajectories_date = read_iso8601_date_string(trajectories_filename,
                                                 is_json=True)
    if is_valid_iso8601_date(trajectories_date):
        log.info(f'trajectories file: {trajectories_filename}')
    else:
        log.error(f'trajectories file, invalid date: {trajectories_date}')
        return errno.EINVAL

    if flights_date != trajectories_date:
        log.error(
            f'Files are not for the same date! Flights date: {flights_date}'
            f', trajectories date: {trajectories_date}')
        return errno.EINVAL

    log.info(f'flights file: {flights_filename}')
    log.info(f'trajectories file: {trajectories_filename}')
    log.info(f'radius: {radius} NM')
    log.info(f'distance_tolerance: {distance_tolerance} NM')

    airports_df = pd.DataFrame()
    try:
        airports_df = pd.read_csv(airports_filename,
                                  index_col='AIRPORT',
                                  memory_map=True)

        log.info(f'{airports_filename} read ok')
    except EnvironmentError:
        log.error(f'could not read file: {airports_filename}')
        return errno.ENOENT

    flights_df = pd.DataFrame()
    try:
        flights_df = pd.read_csv(flights_filename,
                                 usecols=['FLIGHT_ID', 'ADEP', 'ADES'],
                                 index_col='FLIGHT_ID',
                                 memory_map=True)

        log.info(f'{flights_filename} read ok')
    except EnvironmentError:
        log.error(f'could not read file: {flights_filename}')
        return errno.ENOENT

    # Determine the departure and arrival flights
    departures_df = pd.merge(flights_df,
                             airports_df,
                             left_on='ADEP',
                             right_index=True)
    destinations_df = pd.merge(flights_df,
                               airports_df,
                               left_on='ADES',
                               right_index=True)

    trajectories_filename = os.path.basename(trajectories_filename)
    is_bz2 = has_bz2_extension(trajectories_filename)
    if is_bz2:  # remove the .bz2 from the end of the filename
        trajectories_filename = trajectories_filename[:-len(BZ2_FILE_EXTENSION
                                                            )]

    # Write the airport_intersections into a csv file with output_filename
    output_filename = trajectories_filename.replace(TRAJECTORIES,
                                                    AIRPORT_INTERSECTIONS)
    output_filename = output_filename.replace(JSON_FILE_EXTENSION,
                                              CSV_FILE_EXTENSION)
    try:
        with open(output_filename, 'w') as file:
            file.write(AIRPORT_INTERSECTION_FIELDS)

            flights_count = 0
            smoothed_trajectories = generate_SmoothedTrajectories(
                trajectories_filename)
            for smooth_traj in smoothed_trajectories:
                try:
                    flight_id = smooth_traj.flight_id

                    is_departure = flight_id in departures_df.index
                    is_arrival = flight_id in destinations_df.index

                    if is_departure or is_arrival:

                        traj_path = smooth_traj.path.ecef_path()

                        if is_departure:
                            dep_row = departures_df.loc[flight_id]
                            departure = dep_row['ADEP']
                            if len(departure) == AIRPORT_NAME_LENGTH:
                                latitude = dep_row['LATITUDE']
                                longitude = dep_row['LONGITUDE']
                                ref_point = global_Point3d(latitude, longitude)
                                dep_intersection = find_airport_intersection(
                                    smooth_traj, traj_path, departure,
                                    ref_point, radius, False,
                                    distance_tolerance)
                                if not dep_intersection.empty:
                                    dep_intersection.to_csv(
                                        file,
                                        index=False,
                                        header=False,
                                        mode='a',
                                        date_format=ISO8601_DATETIME_US_FORMAT)

                        if is_arrival:
                            dest_row = destinations_df.loc[flight_id]
                            destination = dest_row['ADES']
                            if len(destination) == AIRPORT_NAME_LENGTH:
                                latitude = dest_row['LATITUDE']
                                longitude = dest_row['LONGITUDE']
                                ref_point = global_Point3d(latitude, longitude)
                                dest_intersection = find_airport_intersection(
                                    smooth_traj, traj_path, destination,
                                    ref_point, radius, True,
                                    distance_tolerance)
                                if not dest_intersection.empty:
                                    dest_intersection.to_csv(
                                        file,
                                        index=False,
                                        header=False,
                                        mode='a',
                                        date_format=ISO8601_DATETIME_US_FORMAT)

                    flights_count += 1

                except ValueError:
                    log.exception(
                        f'find_airport_intersections id: {flight_id}')

                except StopIteration:
                    pass

            log.info(
                f'find_airport_intersections finished for {flights_count} trajectories.'
            )

    except EnvironmentError:
        log.error(f'could not write file: {output_filename}')
        return errno.EACCES

    return 0
예제 #13
0
import errno

from pru.trajectory_fields import is_valid_iso8601_date
from scripts.tasks import \
    match_apds_trajectories_on_day, merge_apds_trajectories_on_day
from pru.logger import logger

log = logger(__name__)

if __name__ == '__main__':
    if len(sys.argv) < 4:
        print('Usage: merge_apds_data_on_day.py <from_date> <to_date> <date>')
        sys.exit(errno.EINVAL)

    from_date = sys.argv[1]
    if not is_valid_iso8601_date(from_date):
        log.error(f'invalid from_date: {from_date}')
        sys.exit(errno.EINVAL)

    to_date = sys.argv[2]
    if not is_valid_iso8601_date(to_date):
        log.error(f'invalid to_date: {to_date}')
        sys.exit(errno.EINVAL)

    date = sys.argv[3]
    if not is_valid_iso8601_date(date):
        log.error(f'invalid date: {date}')
        sys.exit(errno.EINVAL)

    if not match_apds_trajectories_on_day(from_date, to_date, date):
        sys.exit(errno.EACCES)
예제 #14
0
def match_apds_trajectories(filenames):

    day_flights_filename = filenames[0]
    apds_flights_filename = filenames[1]

    day_events_filename = filenames[2]
    apds_events_filename = filenames[3]

    # Extract date strings from the input filenames and validate them
    input_date_strings = [''] * len(input_filenames)
    for i in range(len(input_filenames)):
        filename = filenames[i]
        input_date_strings[i] = read_iso8601_date_string(filename)
        if is_valid_iso8601_date(input_date_strings[i]):
            log.info('%s: %s', input_filenames[i], filename)
        else:
            log.error('%s: %s, invalid date: %s', input_filenames[i], filename,
                      input_date_strings[i])
            return errno.EINVAL

    days_date = input_date_strings[0]
    flights_finish_date = input_date_strings[1]
    events_finish_date = input_date_strings[3]

    # Extract the start date string from the filename and validate it
    flights_start_date, _ = split_dual_date(
        os.path.basename(apds_flights_filename))
    if not is_valid_iso8601_date(flights_start_date):
        log.error('apds flights file: %s, invalid start date: %s',
                  apds_flights_filename, flights_start_date)
        return errno.EINVAL

    # Extract the start date string from the filename and validate it
    events_start_date, _ = split_dual_date(
        os.path.basename(apds_events_filename))
    if not is_valid_iso8601_date(events_start_date):
        log.error('apds events file: %s, invalid start date: %s',
                  apds_events_filename, events_start_date)
        return errno.EINVAL

    # Ensure that all files are for the same date
    if (input_date_strings[0] != input_date_strings[2]):
        log.error(
            "Daily files are not for the same dates!"
            " Flights date: %s, Events date: %s", input_date_strings[0],
            input_date_strings[2])
        return errno.EINVAL

    # Ensure that all files are for the same date
    if (flights_start_date != events_start_date) or \
            (flights_finish_date != events_finish_date):
        log.error(
            "APT files are not for the same dates!"
            " Flights start date: %s, Events start date: %s",
            flights_start_date, events_start_date)
        return errno.EINVAL

    # Ensure day is within APT data range
    if not (flights_start_date <= days_date <= flights_finish_date):
        log.error(
            "Daily files are not in APT data range!"
            " Flights date: %s, APT range: %s to %s", days_date,
            flights_start_date, flights_finish_date)
        return errno.EINVAL

    ############################################################################
    # Read the data

    # Read days flights into a pandas DataFrame
    day_flights_df = pd.DataFrame()
    try:
        day_flights_df = pd.read_csv(
            day_flights_filename,
            converters={'FLIGHT_ID': lambda x: UUID(x)},
            usecols=['FLIGHT_ID', 'CALLSIGN', 'ADEP', 'ADES'],
            memory_map=True)
    except EnvironmentError:
        log.error('could not read file: %s', day_flights_filename)
        return errno.ENOENT

    log.info('daily flights read ok')

    # Read APT flights into a pandas DataFrame
    apds_flights_df = pd.DataFrame()
    try:
        apds_flights_df = pd.read_csv(
            apds_flights_filename,
            usecols=['FLIGHT_ID', 'CALLSIGN', 'ADEP', 'ADES'],
            memory_map=True)
    except EnvironmentError:
        log.error('could not read file: %s', apds_flights_filename)
        return errno.ENOENT

    log.info('apds flights read ok')

    # Read days events into a pandas DataFrame
    day_events_df = pd.DataFrame()
    try:
        day_events_df = pd.read_csv(
            day_events_filename,
            converters={'FLIGHT_ID': lambda x: UUID(x)},
            parse_dates=['TIME'],
            memory_map=True)
    except EnvironmentError:
        log.error('could not read file: %s', day_events_filename)
        return errno.ENOENT

    log.info('daily events read ok')

    # Read APT events into a pandas DataFrame
    apds_events_df = pd.DataFrame()
    try:
        apds_events_df = pd.read_csv(apds_events_filename,
                                     parse_dates=['TIME'],
                                     memory_map=True)
    except EnvironmentError:
        log.error('could not read file: %s', apds_events_filename)
        return errno.ENOENT

    log.info('apds events read ok')

    ############################################################################
    # Match events

    apds_day_flights = match_events(day_flights_df, apds_flights_df,
                                    day_events_df, apds_events_df)

    # Output the days ids
    apds_ids_file = create_matching_ids_filename(APDS, days_date)
    try:
        apds_day_flights.to_csv(apds_ids_file,
                                index=False,
                                columns=['FLIGHT_ID', 'NEW_FLIGHT_ID'])
    except EnvironmentError:
        log.error('could not write file: %s', apds_ids_file)
        return errno.EACCESreturn

    log.info('written file: %s', apds_ids_file)

    log.info('apds matching complete')

    return 0
from pru.trajectory_fields import is_valid_iso8601_date
from pru.trajectory_files import CPR_FR24, create_trajectories_filename
from apps.find_user_airspace_intersections import DEFAULT_LOGGING_COUNT
from scripts.tasks import find_trajectory_user_airspace_intersections
from pru.logger import logger

log = logger(__name__)

if __name__ == '__main__':
    if len(sys.argv) < 2:
        print('Usage: find_user_airspace_intersections_on_day.py <date>'
              ' [logging_msg_count]')
        sys.exit(errno.EINVAL)

    date = sys.argv[1]
    trajectory_filename = 'mas_05_'
    if not is_valid_iso8601_date(date):
        log.error(f'invalid date: {date}')
        sys.exit(errno.EINVAL)
    else:
        trajectory_filename += create_trajectories_filename(CPR_FR24, date)

    logging_msg_count = DEFAULT_LOGGING_COUNT
    if len(sys.argv) >= 3:
        logging_msg_count = int(sys.argv[2])

    if not find_trajectory_user_airspace_intersections(
            trajectory_filename, CPR_FR24, logging_msg_count):
        sys.exit(errno.EACCES)
예제 #16
0
def merge_overnight_data_on_day(date,
                                max_speed=DEFAULT_MAX_SPEED,
                                distance_accuracy=DEFAULT_DISTANCE_ACCURACY):
    """
    Match flights for the given day with flights for the previous day.

    Parameters
    ----------
    date: string
        The date in ISO8601 format, e.g. 2017-08-16

    max_speed: string
        The maximum ground speed permitted between adjacent positions [Knots],
        default: 750 Knots.

    distance_accuracy: string
        The maximum distance between positions at the same time [Nautical Miles],
        default: 0.25 NM.

    """
    if is_valid_iso8601_date(date):

        # get the CPR data from the Google bucket
        log.info(f'Getting data for date: {date}')

        merge_files = create_merge_overnight_flight_data_input_filenames(date)
        if not get_processed(REFINED_MERGED_OVERNIGHT_CPR_FR24, merge_files):
            log.error('Flights file not found in overnight_cpr_fr24 bucket')
            return errno.ENOENT

        error_code = merge_overnight_flight_data(merge_files)
        if error_code:
            return error_code

        output_files = create_merge_overnight_flight_data_output_filenames(
            date)
        if not put_processed(REFINED_MERGED_OVERNIGHT_CPR_FR24, output_files):
            log.error('Could not merged files to overnight_cpr_fr24 bucket')
            return errno.EACCES

        raw_filename = output_files[1]
        error_code = clean_position_data(raw_filename, max_speed,
                                         distance_accuracy)
        if error_code:
            log.error('clean_position_data error file: {raw_filename}')
            return error_code

        filenames = create_clean_position_data_filenames(CPR_FR24, date)

        source_path = REFINED_MERGED_OVERNIGHT_CPR_FR24
        if not put_processed(source_path, filenames[:1]):
            log.error('Could not write file: {filenames[:1]} to bucket')
            return errno.EACCES

        errors_path = PRODUCTS_ERROR_METRICS_CPR_FR24_OVERNIGHT
        if not put_processed(errors_path, filenames[1:]):
            log.error('Could not write file: {filenames[1:]} to bucket')
            return errno.EACCES

    else:
        log.error(f'invalid date: {date}')
        return errno.EINVAL

    return 0
예제 #17
0
def match_overnight_flights(filenames,
                            max_time_difference=DEFAULT_MAXIMUM_TIME_DELTA):
    """
    Match overnight flights with the same aircrfat address or callsign.

    The end of the previous flight must be within max_time_difference of the
    start of the next positions flight.

    """
    prev_flights_filename = filenames[0]
    next_flights_filename = filenames[1]

    # Extract date strings from the input filenames and validate them
    input_date_strings = [''] * len(input_filenames)
    for i in range(len(input_filenames)):
        filename = filenames[i]
        input_date_strings[i] = read_iso8601_date_string(filename)
        if is_valid_iso8601_date(input_date_strings[i]):
            log.info('%s: %s', input_filenames[i], filename)
        else:
            log.error('%s: %s, invalid date: %s', input_filenames[i], filename,
                      input_date_strings[i])
            return errno.EINVAL

    # Ensure that all files are for the correct dates
    if (input_date_strings[0] >= input_date_strings[1]):
        log.error(
            "Files are not for the correct dates prev flights date: %s, "
            "flights date: %s", input_date_strings[0], input_date_strings[1])
        return errno.EINVAL

    next_days_date = input_date_strings[1]

    log.info('Maximum time difference: %f', max_time_difference)

    ############################################################################
    # Read the flight files

    # Read previous flights into a pandas DataFrame
    prev_flights_df = pd.DataFrame()
    try:
        prev_flights_df = pd.read_csv(
            prev_flights_filename,
            parse_dates=['PERIOD_START', 'PERIOD_FINISH'],
            converters={'FLIGHT_ID': lambda x: UUID(x)},
            usecols=[
                'FLIGHT_ID', 'CALLSIGN', 'AIRCRAFT_ADDRESS', 'ADEP', 'ADES',
                'PERIOD_START', 'PERIOD_FINISH'
            ])
    except EnvironmentError:
        log.error('could not read file: %s', prev_flights_filename)
        return errno.ENOENT

    log.info('cpr flights read ok')

    # Read next flights into a pandas DataFrame
    next_flights_df = pd.DataFrame()
    try:
        next_flights_df = pd.read_csv(
            next_flights_filename,
            parse_dates=['PERIOD_START', 'PERIOD_FINISH'],
            converters={'FLIGHT_ID': lambda x: UUID(x)},
            usecols=[
                'FLIGHT_ID', 'CALLSIGN', 'AIRCRAFT_ADDRESS', 'ADEP', 'ADES',
                'PERIOD_START', 'PERIOD_FINISH'
            ])
    except EnvironmentError:
        log.error('could not read file: %s', next_flights_filename)
        return errno.ENOENT

    log.info('adsb flights read ok')

    # Dict to hold the flight ids
    flight_ids = {}

    # Get the prev flights with aircraft addresses
    prev_flights_aa = prev_flights_df.loc[
        prev_flights_df['AIRCRAFT_ADDRESS'].notnull()]
    next_flights_aa = next_flights_df.loc[
        next_flights_df['AIRCRAFT_ADDRESS'].notnull()]

    ############################################################################
    # Match the flights

    # match previous and next flights on aircraft address and times wihin max_time_difference
    merge_aa = pd.merge(prev_flights_aa,
                        next_flights_aa,
                        on='AIRCRAFT_ADDRESS')
    merge_aa_time = merge_aa.loc[(
        (merge_aa.PERIOD_START_y - merge_aa.PERIOD_FINISH_x) /
        np.timedelta64(1, 's')) < max_time_difference]

    # add aircraft address matches
    aa_matches = add_matches(merge_aa_time, flight_ids)
    log.info('aircraft address matches: %d, flight_ids: %d', aa_matches,
             len(flight_ids))

    # match previous and next flights on callsign and times wihin max_time_difference
    merge_cs = pd.merge(prev_flights_df, next_flights_df, on='CALLSIGN')
    merge_cs_time = merge_cs.loc[(
        (merge_cs.PERIOD_START_y - merge_cs.PERIOD_FINISH_x) /
        np.timedelta64(1, 's')) < max_time_difference]

    # add callsign matches
    cs_matches = add_matches(merge_cs_time, flight_ids)
    log.info('callsign matches: %d, total matches:%d, flight_ids: %d',
             cs_matches, aa_matches + cs_matches, len(flight_ids))

    ############################################################################
    # Output the previous day ids
    prev_ids_filename = create_matching_ids_filename(PREV_DAY, next_days_date)
    try:
        with open(prev_ids_filename, 'w') as file:
            file.write(NEW_ID_FIELDS)
            for key, value in flight_ids.items():
                print(key, value, sep=',', file=file)

        log.info('written file: %s', prev_ids_filename)
    except EnvironmentError:
        log.error('could not write file: %s', prev_ids_filename)
        return errno.EACCES

    log.info('overnight flight matching complete')

    return 0
예제 #18
0
def convert_apds_data(filename, stands_filename):

    # Extract the start and finish date strings from the filename
    start_date, finish_date = split_dual_date(os.path.basename(filename))
    if not is_valid_iso8601_date(start_date):
        log.error('apds data file: %s, invalid start date: %s', filename,
                  start_date)
        return errno.EINVAL

    # validate the finish date string from the filename
    if not is_valid_iso8601_date(finish_date):
        log.error('apds data file: %s, invalid finish date: %s', filename,
                  finish_date)
        return errno.EINVAL

    log.info('apds data file: %s', filename)

    airport_stands_df = pd.DataFrame()
    if stands_filename:
        try:
            airport_stands_df = pd.read_csv(stands_filename,
                                            index_col=['ICAO_ID', 'STAND_ID'],
                                            memory_map=True)
            airport_stands_df.sort_index()
        except EnvironmentError:
            log.error('could not read file: %s', stands_filename)
            return errno.ENOENT

        log.info('airport stands file: %s', stands_filename)
    else:
        log.info('airport stands not provided')

    # A dict to hold the APDS flights
    flights = {}

    # Read the APDS flights file into flights
    try:
        is_bz2 = has_bz2_extension(filename)
        with bz2.open(filename, 'rt',  newline="") if (is_bz2) else \
                open(filename, 'r') as file:
            reader = csv.reader(file, delimiter=',')
            next(reader, None)  # skip the headers
            for row in reader:
                flights.setdefault(row[ApdsField.APDS_ID],
                                   ApdsFlight(row, airport_stands_df))

    except EnvironmentError:
        log.error('could not read file: %s', filename)
        return errno.ENOENT

    log.info('apds flights read ok')

    valid_flights = 0

    # Output the APDS flight data
    # finish_date
    output_files = create_convert_apds_filenames(start_date, finish_date)
    flight_file = output_files[0]
    try:
        with open(flight_file, 'w') as file:
            file.write(FLIGHT_FIELDS)
            for key, value in sorted(flights.items()):
                print(value, file=file)
                valid_flights += 1

        log.info('written file: %s', flight_file)

    except EnvironmentError:
        log.error('could not write file: %s', flight_file)

    # if airport stand data was provided
    if len(airport_stands_df):
        # Output the APDS position data
        positions_file = output_files[1]
        try:
            with open(positions_file, 'w') as file:
                file.write(POSITION_FIELDS)
                for key, value in sorted(flights.items()):
                    for event in sorted(value.positions):
                        print(event, file=file)

            log.info('written file: %s', positions_file)

        except EnvironmentError:
            log.error('could not write file: %s', positions_file)

    # Output the APDS event data
    event_file = output_files[2]
    try:
        with open(event_file, 'w') as file:
            file.write(FLIGHT_EVENT_FIELDS)
            for key, value in sorted(flights.items()):
                for event in sorted(value.events):
                    print(event, file=file)

        log.info('written file: %s', event_file)

    except EnvironmentError:
        log.error('could not write file: %s', event_file)
        return errno.EACCES

    log.info('apds conversion complete for %s flights on %s', valid_flights,
             start_date)

    return 0
예제 #19
0
def merge_apds_trajectories(filenames):

    apds_ids_filename = filenames[0]

    day_points_filename = filenames[1]
    apds_points_filename = filenames[2]

    day_events_filename = filenames[3]
    apds_events_filename = filenames[4]

    # Extract date strings from the input filenames and validate them
    input_date_strings = [''] * len(input_filenames)
    for i in range(len(input_filenames)):
        filename = filenames[i]
        input_date_strings[i] = read_iso8601_date_string(filename)
        if is_valid_iso8601_date(input_date_strings[i]):
            log.info('%s: %s', input_filenames[i], filename)
        else:
            log.error('%s: %s, invalid date: %s',
                      input_filenames[i], filename, input_date_strings[i])
            return errno.EINVAL

    days_date = input_date_strings[0]
    points_finish_date = input_date_strings[2]

    # Extract the start date string from the filename and validate it
    points_start_date, _ = split_dual_date(os.path.basename(apds_points_filename))
    if not is_valid_iso8601_date(points_start_date):
        log.error('apds points file: %s, invalid start date: %s',
                  apds_points_filename, points_start_date)
        return errno.EINVAL

    # Extract the start date string from the filename and validate it
    events_start_date, _ = split_dual_date(os.path.basename(apds_events_filename))
    if not is_valid_iso8601_date(events_start_date):
        log.error('apds events file: %s, invalid start date: %s',
                  apds_events_filename, events_start_date)
        return errno.EINVAL

    # Ensure that all files are for the same date
    if (days_date != input_date_strings[1]) or \
            (days_date != input_date_strings[3]):
        log.error('Files are not for the same dates!'
                  ' Ids date: %s, Day Positions date: %s, Day Events date: %s',
                  days_date, input_date_strings[1], input_date_strings[3])
        return errno.EINVAL

    # Ensure day is within APDS date range
    if not (points_start_date <= days_date <= points_finish_date):
        log.error("Daily files are not in APDS data range!"
                  " Daily date: %s, APDS range: %s to %s",
                  days_date, points_start_date, points_finish_date)
        return errno.EINVAL

    ############################################################################
    # Merge positions and events

    # Read apds ids into a pandas DataFrame
    apds_ids_df = pd.DataFrame()
    try:
        apds_ids_df = pd.read_csv(apds_ids_filename, index_col='FLIGHT_ID',
                                  converters={'NEW_FLIGHT_ID': lambda x: UUID(x)},
                                  memory_map=True)
    except EnvironmentError:
        log.error('could not read file: %s', apds_ids_filename)
        return errno.ENOENT

    points_df = merge_items(day_points_filename, apds_points_filename,
                            apds_ids_df, log)
    if not len(points_df):
        log.error('Error merging points')
        return errno.ENOENT

    events_df = merge_items(day_events_filename, apds_events_filename,
                            apds_ids_df, log)
    if not len(events_df):
        log.error('Error merging events')
        return errno.ENOENT

    ############################################################################
    # Output Data

    # Output the merged positions
    output_files = create_merge_apds_output_filenames(days_date)
    points_file = output_files[0]
    try:
        points_df.to_csv(points_file, index=False,
                         date_format=ISO8601_DATETIME_FORMAT)
        log.info('written file: %s', points_file)
    except EnvironmentError:
        log.error('could not write file: %s', points_file)
        return errno.EACCES

    # Output the merged events
    events_file = output_files[1]
    try:
        events_df.to_csv(events_file, index=False,
                         date_format=ISO8601_DATETIME_FORMAT)
        log.info('written file: %s', events_file)
    except EnvironmentError:
        log.error('could not write file: %s', events_file)
        return errno.EACCES

    log.info('apds merging complete')

    return 0
예제 #20
0
def match_consecutive_day_trajectories(
        filenames,
        max_time_difference=DEFAULT_MAXIMUM_TIME_DELTA,
        max_speed=DEFAULT_MAXIMUM_SPEED):

    prev_flights_filename = filenames[0]
    next_flights_filename = filenames[1]

    # Note positions files must be 'clean'
    prev_positions_filename = filenames[2]
    next_positions_filename = filenames[3]

    # Extract date strings from the input filenames and validate them
    input_date_strings = [''] * len(input_filenames)
    for i in range(len(input_filenames)):
        filename = filenames[i]
        input_date_strings[i] = read_iso8601_date_string(filename)
        if is_valid_iso8601_date(input_date_strings[i]):
            log.info('%s: %s', input_filenames[i], filename)
        else:
            log.error('%s: %s, invalid date: %s', input_filenames[i], filename,
                      input_date_strings[i])
            return errno.EINVAL

    # Ensure that all files are for the correct dates
    if (input_date_strings[0] != input_date_strings[2]) \
            or (input_date_strings[1] != input_date_strings[3]) \
            or (input_date_strings[0] >= input_date_strings[1]):
        log.error(
            "Files are not for the correct dates prev flights date: %s, "
            "next flights date: %s  prev Positions date: %s, "
            "next Positions date: %s", input_date_strings[0],
            input_date_strings[1], input_date_strings[2],
            input_date_strings[3])
        return errno.EINVAL

    next_days_date = input_date_strings[1]

    log.info('Maximum time difference: %f', max_time_difference)

    ############################################################################
    # Read the files

    # Read previous flights into a pandas DataFrame
    prev_flights_df = pd.DataFrame()
    try:
        prev_flights_df = pd.read_csv(
            prev_flights_filename,
            parse_dates=['PERIOD_START', 'PERIOD_FINISH'],
            converters={'FLIGHT_ID': lambda x: UUID(x)},
            usecols=[
                'FLIGHT_ID', 'CALLSIGN', 'AIRCRAFT_ADDRESS', 'ADEP', 'ADES',
                'PERIOD_START', 'PERIOD_FINISH'
            ])
    except EnvironmentError:
        log.error('could not read file: %s', prev_flights_filename)
        return errno.ENOENT

    log.info('cpr flights read ok')

    # Read next flights into a pandas DataFrame
    next_flights_df = pd.DataFrame()
    try:
        next_flights_df = pd.read_csv(
            next_flights_filename,
            parse_dates=['PERIOD_START', 'PERIOD_FINISH'],
            converters={'FLIGHT_ID': lambda x: UUID(x)},
            usecols=[
                'FLIGHT_ID', 'CALLSIGN', 'AIRCRAFT_ADDRESS', 'ADEP', 'ADES',
                'PERIOD_START', 'PERIOD_FINISH'
            ])
    except EnvironmentError:
        log.error('could not read file: %s', next_flights_filename)
        return errno.ENOENT

    log.info('adsb flights read ok')

    # Read previous points into a pandas DataFrame
    prev_points_df = pd.DataFrame()
    try:
        prev_points_df = pd.read_csv(
            prev_positions_filename,
            parse_dates=['TIME'],
            index_col='FLIGHT_ID',
            converters={'FLIGHT_ID': lambda x: UUID(x)},
            usecols=['FLIGHT_ID', 'TIME', 'LAT', 'LON', 'ALT'])
    except EnvironmentError:
        log.error('could not read file: %s', prev_positions_filename)
        return errno.ENOENT

    log.info('prev points read ok')

    # Read the next points into a pandas DataFrame
    next_points_df = pd.DataFrame()
    try:
        next_points_df = pd.read_csv(
            next_positions_filename,
            parse_dates=['TIME'],
            index_col='FLIGHT_ID',
            converters={'FLIGHT_ID': lambda x: UUID(x)},
            usecols=['FLIGHT_ID', 'TIME', 'LAT', 'LON', 'ALT'])
    except EnvironmentError:
        log.error('could not read file: %s', next_positions_filename)
        return errno.ENOENT

    log.info('next points read ok')

    # Dict to hold the flight ids
    flight_ids = {}

    # Get the prev flights with aircraft addresses
    prev_flights_aa = prev_flights_df.loc[
        prev_flights_df['AIRCRAFT_ADDRESS'].notnull()]
    next_flights_aa = next_flights_df.loc[
        next_flights_df['AIRCRAFT_ADDRESS'].notnull()]

    ############################################################################
    # Match the flights

    # match previous and next flights on aircraft address and times wihin max_time_difference
    merge_aa = pd.merge(prev_flights_aa,
                        next_flights_aa,
                        on='AIRCRAFT_ADDRESS')
    merge_aa_time = merge_aa.loc[(
        (merge_aa.PERIOD_START_y - merge_aa.PERIOD_FINISH_x) /
        np.timedelta64(1, 's')) < max_time_difference]

    # verify aircraft address matches
    aa_matches = verify_matches(merge_aa_time, prev_points_df, next_points_df,
                                flight_ids, max_time_difference, max_speed)
    log.info('aircraft address matches: %d, flight_ids: %d', aa_matches,
             len(flight_ids))

    # match previous and next flights on callsign and times wihin max_time_difference
    merge_cs = pd.merge(prev_flights_df, next_flights_df, on='CALLSIGN')
    merge_cs_time = merge_cs.loc[(
        (merge_cs.PERIOD_START_y - merge_cs.PERIOD_FINISH_x) /
        np.timedelta64(1, 's')) < max_time_difference]

    # verify callsign matches
    cs_matches = verify_matches(merge_cs_time, prev_points_df, next_points_df,
                                flight_ids, max_time_difference, max_speed)
    log.info('callsign matches: %d, total matches:%d, flight_ids: %d',
             cs_matches, aa_matches + cs_matches, len(flight_ids))

    # match previous and next flights on departure, destination and overlaping start & end times
    merge_dep_des = pd.merge(prev_flights_df,
                             next_flights_df,
                             on=['ADEP', 'ADES'])
    merge_dep_des_time = merge_cs.loc[(
        (merge_dep_des.PERIOD_START_y - merge_dep_des.PERIOD_FINISH_x) /
        np.timedelta64(1, 's')) < max_time_difference]

    # verify departure and destination airport matches
    apt_matches = verify_matches(merge_dep_des_time, prev_points_df,
                                 next_points_df, flight_ids,
                                 max_time_difference, max_speed)
    log.info('airport matches: %d, total matches:%d, flight_ids: %d',
             apt_matches, apt_matches + aa_matches + cs_matches,
             len(flight_ids))

    # Output the previous day ids
    prev_ids_filename = create_matching_ids_filename(PREV_DAY, next_days_date)
    try:
        with open(prev_ids_filename, 'w') as file:
            file.write(NEW_ID_FIELDS)
            for key, value in flight_ids.items():
                print(key, value, sep=',', file=file)
    except EnvironmentError:
        log.error('could not write file: %s', prev_ids_filename)
        return errno.EACCES

    log.info('written file: %s', prev_ids_filename)

    log.info('consecutive day matching complete')

    return 0
예제 #21
0
def extract_overnight_data(filenames):
    """
    Extract flights, postions and events for the previosu day from the data.

    It writes items (flights, positions or events) WITHOUT ids in the ids file
    into files with 'new_' prepended to the filename.

    It writes positions and events with ids in the ids file into positions and
    files for the previus day, with the ids replaced by the previous ids in the
    ids file.

    """
    day_ids_filename = filenames[0]

    flights_filename = filenames[1]
    positions_filename = filenames[2]
    events_filename = filenames[3]

    # Extract date strings from the input filenames and validate them
    input_date_strings = [''] * len(input_filenames)
    for i in range(len(input_filenames)):
        filename = filenames[i]
        input_date_strings[i] = read_iso8601_date_string(filename)
        if is_valid_iso8601_date(input_date_strings[i]):
            log.info('%s: %s', input_filenames[i], filename)
        else:
            log.error('%s: %s, invalid date: %s', input_filenames[i], filename,
                      input_date_strings[i])
            return errno.EINVAL

    date = input_date_strings[0]

    if (date != input_date_strings[1]) \
            or (date != input_date_strings[2]) \
            or (date != input_date_strings[3]):
        log.error("Files are not for the same dates: %s,%s,%s,%s",
                  input_date_strings[0], input_date_strings[1],
                  input_date_strings[2], input_date_strings[3])

        return errno.EINVAL

    ############################################################################

    # Read the Id file
    ids_df = pd.DataFrame()
    try:
        ids_df = pd.read_csv(day_ids_filename,
                             index_col='FLIGHT_ID',
                             converters={
                                 'FLIGHT_ID': lambda x: UUID(x),
                                 'NEW_FLIGHT_ID': lambda x: UUID(x)
                             },
                             memory_map=True)
    except EnvironmentError:
        log.error('could not read file: %s', day_ids_filename)
        return errno.ENOENT

    # Don't require the flights
    # Just write flights WITHOUT mathing ids into a "new" file
    _ = extract_next_day_items(flights_filename, ids_df)

    prev_positions = extract_next_day_items(positions_filename, ids_df,
                                            ['TIME'])

    # Output the new next items
    prev_date = iso8601_previous_day(date)
    overnight_positions_filename = 'overnight_' + \
        create_positions_filename(CPR_FR24, prev_date)
    try:
        prev_positions.to_csv(overnight_positions_filename,
                              index=False,
                              date_format=ISO8601_DATETIME_FORMAT)
        log.info('written file: %s', overnight_positions_filename)
    except EnvironmentError:
        log.error('could not write file: %s', overnight_positions_filename)
        return errno.ENOENT

    prev_events = extract_next_day_items(events_filename, ids_df, ['TIME'])
    overnight_events_filename = 'overnight_' + \
        create_events_filename(CPR_FR24, prev_date)
    try:
        prev_events.to_csv(overnight_events_filename,
                           index=False,
                           date_format=ISO8601_DATETIME_FORMAT)
        log.info('written file: %s', overnight_events_filename)
    except EnvironmentError:
        log.error('could not write file: %s', overnight_events_filename)
        return errno.ENOENT

    log.info('extraction complete')

    return 0
예제 #22
0
def convert_fr24_data(filenames):

    flights_filename = filenames[0]
    points_filename = filenames[1]

    if flights_filename == points_filename:
        log.error(
            'Files are the same! Flights filename: %s, points filename: %s',
            flights_filename, points_filename)
        return errno.EINVAL

    # Extract the date string from the filename and validate it
    flights_date = read_iso8601_date_string(flights_filename)
    if is_valid_iso8601_date(flights_date):
        log.info('fr24 flights file: %s', flights_filename)
    else:
        log.error('fr24 flights file: %s, invalid date: %s', flights_filename,
                  flights_date)
        return errno.EINVAL

    # Extract the date string from the filename and validate it
    points_date = read_iso8601_date_string(points_filename)
    if is_valid_iso8601_date(points_date):
        log.info('fr24 points file: %s', points_filename)
    else:
        log.error('fr24 points file: %s, invalid date: %s', points_filename,
                  points_date)
        return errno.EINVAL

    if flights_date != points_date:
        log.error(
            'Files are not for the same date! Flights date: %s, points date: %s',
            flights_date, points_date)
        return errno.EINVAL

    # A dict to hold the ADS-B flights
    flights = {}

    # Read the ADS-B flights file into flights
    try:
        is_bz2 = has_bz2_extension(flights_filename)
        with bz2.open(flights_filename, 'rt',  newline="") if (is_bz2) else \
                open(flights_filename, 'r') as file:
            reader = csv.reader(file, delimiter=',')
            next(reader, None)  # skip the headers
            for row in reader:
                flights.setdefault(row[AdsbFlightField.FLIGHT_ID],
                                   AdsbFlight(row))

    except EnvironmentError:
        log.error('could not read file: %s', flights_filename)
        return errno.ENOENT

    log.info('fr24 flights read ok')

    # Read the ADS-B points file into flights
    try:
        is_bz2 = has_bz2_extension(points_filename)
        with bz2.open(points_filename, 'rt',  newline="") if (is_bz2) else \
                open(points_filename, 'r') as file:
            reader = csv.reader(file, delimiter=',')
            next(reader, None)  # skip the headers
            for row in reader:
                if row[AdsbPointField.FLIGHT_ID] in flights:
                    flights[row[AdsbPointField.FLIGHT_ID]].append(row)

    except EnvironmentError:
        log.error('could not read file: %s', points_filename)
        return errno.ENOENT

    log.info('fr24 points read ok')

    # sort positions in date time (of position) order
    for key, values in flights.items():
        values.sort()
    log.info('fr24 points sorted')

    valid_flights = 0

    # Output the ADS-B flight data for all flights
    output_files = create_convert_fr24_filenames(flights_date)
    flight_file = output_files[0]
    try:
        with open(flight_file, 'w') as file:
            file.write(FLIGHT_FIELDS)
            for key, values in sorted(flights.items()):
                if values.is_valid:
                    print(values, file=file)
                    valid_flights += 1

        log.info('written file: %s', flight_file)

    except EnvironmentError:
        log.error('could not write file: %s', flight_file)

    # Output the ADS-B position data for all flights
    positions_file = output_files[1]
    try:
        with open(positions_file, 'w') as file:
            file.write(POSITION_FIELDS)
            for key, values in sorted(flights.items()):
                if values.is_valid:
                    for pos in values.positions:
                        print(pos, file=file)

        log.info('written file: %s', positions_file)

    except EnvironmentError:
        log.error('could not write file: %s', positions_file)
        return errno.EACCES

    log.info('fr24 conversion complete for %s flights on %s', valid_flights,
             points_date)

    return 0
def merge_cpr_adsb_trajectories(filenames):

    cpr_ids_filename = filenames[0]
    adsb_ids_filename = filenames[1]

    cpr_flights_filename = filenames[2]
    adsb_flights_filename = filenames[3]

    # Note positions files must be clean
    cpr_positions_filename = filenames[4]
    adsb_positions_filename = filenames[5]

    cpr_events_filename = filenames[6]

    # Extract date strings from the input filenames and validate them
    input_date_strings = [''] * len(input_filenames)
    for i in range(len(input_filenames)):
        filename = filenames[i]
        input_date_strings[i] = read_iso8601_date_string(filename)
        if is_valid_iso8601_date(input_date_strings[i]):
            log.info('%s: %s', input_filenames[i], filename)
        else:
            log.error('%s: %s, invalid date: %s', input_filenames[i], filename,
                      input_date_strings[i])
            return errno.EINVAL

    # Ensure that all files are for the same date
    if input_date_strings[1:] != input_date_strings[:-1]:
        log.error("Files are not for the same dates: %s,%s,%s,%s,%s,%s,%s",
                  input_date_strings[0], input_date_strings[1],
                  input_date_strings[2], input_date_strings[3],
                  input_date_strings[4], input_date_strings[5],
                  input_date_strings[6])
        return errno.EINVAL

    ############################################################################
    # Read the files

    # Read the Id files
    cpr_ids_df = pd.DataFrame()
    try:
        cpr_ids_df = pd.read_csv(
            cpr_ids_filename,
            index_col='FLIGHT_ID',
            converters={'NEW_FLIGHT_ID': lambda x: UUID(x)},
            memory_map=True)
        cpr_ids_df.sort_index(inplace=True)
    except EnvironmentError:
        log.error('could not read file: %s', cpr_ids_filename)
        return errno.ENOENT

    adsb_ids_df = pd.DataFrame()
    try:
        adsb_ids_df = pd.read_csv(
            adsb_ids_filename,
            index_col='FLIGHT_ID',
            converters={'NEW_FLIGHT_ID': lambda x: UUID(x)},
            memory_map=True)
        cpr_ids_df.sort_index(inplace=True)
    except EnvironmentError:
        log.error('could not read file: %s', adsb_ids_filename)
        return errno.ENOENT

    log.info('read ids files: %s,%s', cpr_ids_filename, adsb_ids_filename)

    # Read and merge the flights
    flights_df = pd.DataFrame()
    try:
        flights_df = get_merged_flights(cpr_flights_filename,
                                        adsb_flights_filename, cpr_ids_df,
                                        adsb_ids_df)
    except EnvironmentError:
        log.error('could not read file: %s or %s', cpr_flights_filename,
                  adsb_flights_filename)
        return errno.ENOENT

    log.info('read and merged flights files: %s,%s', cpr_flights_filename,
             adsb_flights_filename)

    # Read and merge the position
    positions_df = pd.DataFrame()
    try:
        positions_df = get_merged_positions(cpr_positions_filename,
                                            adsb_positions_filename,
                                            cpr_ids_df, adsb_ids_df)
    except EnvironmentError:
        log.error('could not read file: %s or %s', cpr_positions_filename,
                  adsb_positions_filename)
        return errno.ENOENT

    # Read and merge the positions
    log.info('read and merged positions files: %s,%s', cpr_positions_filename,
             adsb_positions_filename)

    # Read and merge the events
    events_df = pd.DataFrame()
    try:
        events_df = read_dataframe_with_new_ids(cpr_events_filename,
                                                cpr_ids_df)
        replace_old_flight_ids(events_df)
    except EnvironmentError:
        log.error('could not read file:  %s', cpr_events_filename)
        return errno.ENOENT

    # Read and merge the positions
    log.info('read and merged events file: %s', cpr_events_filename)

    update_flight_data(flights_df, positions_df)

    # Output the flights
    output_files = create_merge_cpr_adsb_output_filenames(
        input_date_strings[0])
    output_flights_filename = output_files[0]
    try:
        flights_df.to_csv(output_flights_filename,
                          index=False,
                          date_format=ISO8601_DATETIME_FORMAT)
    except EnvironmentError:
        log.error('could not write file: %s', output_flights_filename)
        return errno.EACCES

    log.info('written file: %s', output_flights_filename)

    # Convert the positions prior to output
    replace_old_flight_ids(positions_df)
    output_positions_filename = output_files[1]
    try:
        positions_df.to_csv(output_positions_filename,
                            index=False,
                            date_format=ISO8601_DATETIME_FORMAT)
    except EnvironmentError:
        log.error('could not write file: %s', output_positions_filename)
        return errno.EACCES

    log.info('written file: %s', output_positions_filename)

    # Output the events
    output_events_filename = output_files[2]
    try:
        events_df.to_csv(output_events_filename,
                         index=False,
                         date_format=ISO8601_DATETIME_FORMAT)
    except EnvironmentError:
        log.error('could not write file: %s', output_events_filename)
        return errno.EACCES

    log.info('written file: %s', output_events_filename)

    log.info('merging complete')

    return 0
예제 #24
0
def match_cpr_adsb_trajectories(
        filenames,
        distance_threshold=DEFAULT_MATCHING_DISTANCE_THRESHOLD,
        alt_threshold=DEFAULT_MATCHING_ALTITUDE_THRESHOLD):

    # Extract date strings from the input filenames and validate them
    input_date_strings = [''] * len(input_filenames)
    for i in range(len(input_filenames)):
        filename = filenames[i]
        input_date_strings[i] = read_iso8601_date_string(filename)
        if is_valid_iso8601_date(input_date_strings[i]):
            log.info('%s: %s', input_filenames[i], filename)
        else:
            log.error('%s: %s, invalid date: %s', input_filenames[i], filename,
                      input_date_strings[i])
            return errno.EINVAL

    # Ensure that files are all for the same date
    if input_date_strings[1:] != input_date_strings[:-1]:
        log.error(
            'Files are not for the same date!'
            ' CPR Flights date: %s, ADSB Flights date: %s,'
            ' CPR Positions date: %s, ADSB Positions date: %s',
            input_date_strings[0], input_date_strings[1],
            input_date_strings[2], input_date_strings[3])
        return errno.EINVAL

    cpr_flights_filename = filenames[0]
    adsb_flights_filename = filenames[1]

    # Note positions files must be 'clean'
    cpr_positions_filename = filenames[2]
    adsb_positions_filename = filenames[3]

    log.info('Distance threshold: %f', distance_threshold)
    log.info('Altitude threshold: %f', alt_threshold)

    ############################################################################
    # Read the files

    # Read CPR flights into a pandas DataFrame
    cpr_flights_df = pd.DataFrame()
    try:
        cpr_flights_df = pd.read_csv(
            cpr_flights_filename,
            parse_dates=['PERIOD_START', 'PERIOD_FINISH'],
            converters={'FLIGHT_ID': lambda x: int(x)},
            usecols=[
                'FLIGHT_ID', 'CALLSIGN', 'AIRCRAFT_ADDRESS', 'ADEP', 'ADES',
                'PERIOD_START', 'PERIOD_FINISH'
            ],
            memory_map=True)
        log.info('cpr flights read ok')
    except EnvironmentError:
        log.error('could not read file: %s', cpr_flights_filename)
        return errno.ENOENT

    # Read ADS-B flights into a pandas DataFrame
    adsb_flights_df = pd.DataFrame()
    try:
        adsb_flights_df = pd.read_csv(
            adsb_flights_filename,
            parse_dates=['PERIOD_START', 'PERIOD_FINISH'],
            converters={'FLIGHT_ID': lambda x: int(x, 16)},
            usecols=[
                'FLIGHT_ID', 'CALLSIGN', 'AIRCRAFT_ADDRESS', 'ADEP', 'ADES',
                'PERIOD_START', 'PERIOD_FINISH'
            ],
            memory_map=True)
        log.info('adsb flights read ok')
    except EnvironmentError:
        log.error('could not read file: %s', adsb_flights_filename)
        return errno.ENOENT

    # Read CPR points into a pandas DataFrame
    cpr_points_df = pd.DataFrame()
    try:
        cpr_points_df = pd.read_csv(
            cpr_positions_filename,
            parse_dates=['TIME'],
            index_col='FLIGHT_ID',
            converters={'FLIGHT_ID': lambda x: int(x)},
            usecols=['FLIGHT_ID', 'TIME', 'LAT', 'LON', 'ALT'],
            memory_map=True)
    except EnvironmentError:
        log.error('could not read file: %s', cpr_positions_filename)
        return errno.ENOENT

    log.info('cpr points read ok')

    # Read the ADS-B points
    adsb_points_df = pd.DataFrame()
    try:
        adsb_points_df = pd.read_csv(
            adsb_positions_filename,
            parse_dates=['TIME'],
            index_col='FLIGHT_ID',
            converters={'FLIGHT_ID': lambda x: int(x, 16)},
            usecols=['FLIGHT_ID', 'TIME', 'LAT', 'LON', 'ALT'],
            memory_map=True)
    except EnvironmentError:
        log.error('could not read file: %s', adsb_positions_filename)
        return errno.ENOENT

    log.info('adsb points read ok')

    # Dicts to hold the flight ids
    cpr_flight_ids = {}
    adsb_flight_ids = {}
    merge_flight_ids = {}

    # Get the CPR flights with aircraft addresses
    cpr_flights_aa = cpr_flights_df.loc[
        cpr_flights_df['AIRCRAFT_ADDRESS'].notnull()]

    ############################################################################
    # Match the flights

    # match CPR and ADS-B flights on aircraft address and overlaping start & end times
    merge_aa = pd.merge(cpr_flights_aa, adsb_flights_df, on='AIRCRAFT_ADDRESS')
    merge_aa_time = merge_aa.loc[
        (merge_aa.PERIOD_START_x <= merge_aa.PERIOD_FINISH_y)
        & (merge_aa.PERIOD_START_y <= merge_aa.PERIOD_FINISH_x)]

    log.info('aircraft address time matches: %d', len(merge_aa_time))

    # verify aircraft address matches
    aa_matches = verify_flight_matches(merge_aa_time, cpr_points_df,
                                       adsb_points_df, cpr_flight_ids,
                                       adsb_flight_ids, merge_flight_ids,
                                       distance_threshold, alt_threshold)
    log.info(
        'aircraft address matches: %d, cpr_ids: %d, adsb_ids: %d, merge_ids: %d',
        aa_matches, len(cpr_flight_ids), len(adsb_flight_ids),
        len(merge_flight_ids))

    # match CPR and ADS-B flights on callsign and overlaping start & end times
    merge_cs = pd.merge(cpr_flights_df, adsb_flights_df, on='CALLSIGN')
    merge_cs_time = merge_cs.loc[
        (merge_cs.PERIOD_START_x <= merge_cs.PERIOD_FINISH_y)
        & (merge_cs.PERIOD_START_y <= merge_cs.PERIOD_FINISH_x)]

    log.info('callsign time matches: %d', len(merge_cs_time))

    # verify callsign matches
    cs_matches = verify_flight_matches(merge_cs_time, cpr_points_df,
                                       adsb_points_df, cpr_flight_ids,
                                       adsb_flight_ids, merge_flight_ids,
                                       distance_threshold, alt_threshold)
    log.info('callsign matches: %d, cpr_ids: %d, adsb_ids: %d, merge_ids: %d',
             cs_matches, len(cpr_flight_ids), len(adsb_flight_ids),
             len(merge_flight_ids))

    # merge overlapping aircraft address and callsign matches
    if len(merge_flight_ids):
        merge_matches(cpr_flight_ids, merge_flight_ids)
        merge_matches(adsb_flight_ids, merge_flight_ids)
        merge_flight_ids.clear()

    # match CPR and ADS-B flights on departure, destination and overlaping start & end times
    merge_dep_des = pd.merge(cpr_flights_df,
                             adsb_flights_df,
                             on=['ADEP', 'ADES'])
    merge_dep_des_time = merge_cs.loc[
        (merge_dep_des.PERIOD_START_x <= merge_dep_des.PERIOD_FINISH_y)
        & (merge_dep_des.PERIOD_START_y <= merge_dep_des.PERIOD_FINISH_x)]

    # verify departure, destination matches
    dep_des_matches = verify_flight_matches(merge_dep_des_time, cpr_points_df,
                                            adsb_points_df, cpr_flight_ids,
                                            adsb_flight_ids, merge_flight_ids,
                                            distance_threshold, alt_threshold)
    log.info('airport matches: %d, cpr_ids: %d, adsb_ids: %d, merge_ids: %d',
             dep_des_matches, len(cpr_flight_ids), len(adsb_flight_ids),
             len(merge_flight_ids))

    # merge overlapping aircraft address, callsign and airport matches
    if len(merge_flight_ids):
        merge_matches(cpr_flight_ids, merge_flight_ids)
        merge_matches(adsb_flight_ids, merge_flight_ids)
        merge_flight_ids.clear()

    # Add unmatched flight ids to cpr_flight_ids and adsb_flight_ids
    allocate_remaining_ids(cpr_flight_ids, cpr_flights_df['FLIGHT_ID'].values)
    allocate_remaining_ids(adsb_flight_ids,
                           adsb_flights_df['FLIGHT_ID'].values)

    ############################################################################
    # Output the matching ids

    # Output the CPR ids
    output_files = create_match_cpr_adsb_output_filenames(
        input_date_strings[0])
    cpr_ids_file = output_files[0]
    try:
        with open(cpr_ids_file, 'w') as file:
            file.write(NEW_ID_FIELDS)
            for key in cpr_flight_ids:
                value = cpr_flight_ids[key]
                print(key, value, sep=',', file=file)
    except EnvironmentError:
        log.error('could not write file: %s', cpr_ids_file)
        return errno.EACCES

    log.info('written file: %s', cpr_ids_file)

    # Output the ADS-B ids
    adsb_ids_file = output_files[1]
    try:
        with open(adsb_ids_file, 'w', newline='') as file:
            file.write(NEW_ID_FIELDS)
            for key in adsb_flight_ids:
                adsb_str = '0x{:06x},{}'.format(key, adsb_flight_ids[key])
                print(adsb_str, file=file)
    except EnvironmentError:
        log.error('could not write file: %s', adsb_ids_file)
        return errno.EACCES

    log.info('written file: %s', adsb_ids_file)

    log.info('matching complete')

    return 0