def data_batch(input_dir, phone_type, target_key, set_normal, batch_size=16, shuffle=False, window_config=constant.WINDOW_CONFIG): # load sequence data seq_pair_data = frame.FlightSequencePairData( '{}/{}_g1000.sequence'.format(input_dir, phone_type)) time_col = seq_pair_data.meta_target.clone() time_col.prune_keys(remain_keys=['time']) if not args['--all']: time_col.prune_identifier(remain_identifier=TEST_LIST) # remain only necessary keywords if args['--stratux'] is None: seq_pair_data.prune_keys(input_remain_keys=constant.INPUT_KEYWORDS, target_remain_keys=(target_key, )) else: lv = args['--stratux'] input_key = target_key.split('_')[1] if lv == 0: input_remain_keys = (input_key, ) elif lv == 1: input_remain_keys = ('alt', 'lat', 'long', input_key) elif lv == 2: input_remain_keys = ('alt', 'lat', 'long', 'pitch', 'roll', 'heading') else: raise NotImplementedError seq_pair_data.prune_keys(input_remain_keys=input_remain_keys, target_remain_keys=(target_key, )) if not args['--all']: seq_pair_data.prune_identifier(remain_identifier=TEST_LIST) # normalize sequence data on time domain seq_pair_data.normalize(set_normal) # divide sequence data into frames frame_pair_data = frame.FlightFramePairData( seq_pair_data, input_win_len=window_config['input']['length'], target_win_len=window_config['target']['length'], input_win_offset=window_config['input']['offset_length'], target_win_offset=window_config['target']['offset_length'], input_win_offset_rate=window_config['input']['offset_rate'], target_win_offset_rate=window_config['target']['offset_rate'], input_pad=window_config['input']['padding'], target_pad=window_config['target']['padding']) batch_loader = batch.FramePairDataLoader(frame_pair_data, batch_size=batch_size, shuffle=shuffle, drop_last=False) return batch_loader, seq_pair_data.length_dict(), time_col
def predict(key, args): def concat_prediction(prediction1, prediction2): # link data dataset1 = prediction1.dataset keys1 = prediction1.keys identifier1 = prediction1.identifier dataset2 = prediction2.dataset keys2 = prediction2.keys identifier2 = prediction2.identifier # predictions should match each other assert identifier1 == identifier2 # keywords should not overlap assert set(keys1) & set(keys2) == set() # concatenate data dataset = [] identifier = [] for i in range(len(dataset1)): # identifier should match each other assert identifier1[i] == identifier2[i] dataset.append(np.concatenate([dataset1[i], dataset2[i]], axis=1)) identifier.append(identifier1[i]) keys = keys1 + keys2 return flight.FlightPruneData((dataset, keys, identifier)) if args['--stratux'] is not None and args['--stratux'] < 0: seq_pair_data = frame.FlightSequencePairData( '{}/{}_g1000.sequence'.format(args['<input-dir>'], args['--phone'])) time_col = seq_pair_data.meta_target.clone() time_col.prune_keys(remain_keys=['time']) if not args['--all']: time_col.prune_identifier(remain_identifier=TEST_LIST) seq_pair_data.prune_keys(input_remain_keys=(key, ), target_remain_keys=(key, )) if not args['--all']: seq_pair_data.prune_identifier(remain_identifier=TEST_LIST) prediction = seq_pair_data.meta_input.clone() prediction.prune_keys(remain_keys=['{}'.format(key)]) std_prediction = seq_pair_data.meta_target.clone() std_prediction.prune_keys(remain_keys=['{}'.format(key)]) prediction.keys = tuple(['*{}'.format(key)]) prediction = concat_prediction(prediction, std_prediction) return prediction, time_col if args['--stratux'] is None: name = key else: name = "{}.lv{}".format(key, args['--stratux']) model = torch.load('{}/{}.{}.model'.format(args['--model'], args['--phone'], name)) normal = torch.load('{}/{}.{}.normal'.format(args['--model'], args['--phone'], name)) model['sin'].cuda = False model['cos'].cuda = False def data_batch(input_dir, phone_type, target_key, set_normal, batch_size=16, shuffle=False, window_config=constant.WINDOW_CONFIG): # load sequence data seq_pair_data = frame.FlightSequencePairData( '{}/{}_g1000.sequence'.format(input_dir, phone_type)) time_col = seq_pair_data.meta_target.clone() time_col.prune_keys(remain_keys=['time']) if not args['--all']: time_col.prune_identifier(remain_identifier=TEST_LIST) # remain only necessary keywords if args['--stratux'] is None: seq_pair_data.prune_keys(input_remain_keys=constant.INPUT_KEYWORDS, target_remain_keys=(target_key, )) else: lv = args['--stratux'] input_key = target_key.split('_')[1] if lv == 0: input_remain_keys = (input_key, ) elif lv == 1: input_remain_keys = ('alt', 'lat', 'long', input_key) elif lv == 2: input_remain_keys = ('alt', 'lat', 'long', 'pitch', 'roll', 'heading') else: raise NotImplementedError seq_pair_data.prune_keys(input_remain_keys=input_remain_keys, target_remain_keys=(target_key, )) if not args['--all']: seq_pair_data.prune_identifier(remain_identifier=TEST_LIST) # normalize sequence data on time domain seq_pair_data.normalize(set_normal) # divide sequence data into frames frame_pair_data = frame.FlightFramePairData( seq_pair_data, input_win_len=window_config['input']['length'], target_win_len=window_config['target']['length'], input_win_offset=window_config['input']['offset_length'], target_win_offset=window_config['target']['offset_length'], input_win_offset_rate=window_config['input']['offset_rate'], target_win_offset_rate=window_config['target']['offset_rate'], input_pad=window_config['input']['padding'], target_pad=window_config['target']['padding']) batch_loader = batch.FramePairDataLoader(frame_pair_data, batch_size=batch_size, shuffle=shuffle, drop_last=False) return batch_loader, seq_pair_data.length_dict(), time_col sin_loader, sin_len_dict, sin_time_col = data_batch( args['<input-dir>'], args['--phone'], "sin_{}".format(key), normal['sin']) sin_loader, sin_loss = model['sin'].predict(sin_loader, '*sin_{}'.format(key), (-1, 32, 1), fake=False) sin_frame = sin_loader.to_frame_pair() sin_seq = sin_frame.to_seq_pair(sin_len_dict) sin_seq.denormalize(normal['sin']) sin_target = sin_seq.meta_target cos_loader, cos_len_dict, cos_time_col = data_batch( args['<input-dir>'], args['--phone'], "cos_{}".format(key), normal['cos']) cos_loader, cos_loss = model['cos'].predict(cos_loader, '*cos_{}'.format(key), (-1, 32, 1), fake=False) cos_frame = cos_loader.to_frame_pair() cos_seq = cos_frame.to_seq_pair(cos_len_dict) cos_seq.denormalize(normal['cos']) cos_target = cos_seq.meta_target prediction = concat_prediction(sin_target, cos_target) # fetch standard target data column global standard_data std_prediction = standard_data.meta_target.clone() std_prediction.prune_identifier(remain_identifier=prediction.identifier) std_prediction.prune_keys(remain_keys=['{}'.format(key)]) prediction = concat_prediction(prediction, std_prediction) # convert from trigonometrics prediction to final prediction prediction.append_rev_deg_trig( sin_key='*sin_{}'.format(key), cos_key='*cos_{}'.format(key), new_key='*{}'.format(key), bidirect=False if key == 'heading' else True, sin_loss=sin_loss, cos_loss=cos_loss) # only remain final prediction prediction.prune_keys(remain_keys=['*{}'.format(key), key]) return prediction, sin_time_col
col_id = None for i, itr in enumerate(prune.keys): if itr == key: col_id = i break assert col_id is not None return col_id input_data = flight.FlightExtensionData('{}/{}.extension'.format( args['<input-dir>'], args['--phone'])) target_data = flight.FlightExtensionData('{}/g1000.extension'.format( args['<input-dir>'])) standard_data = frame.FlightSequencePairData('{}/{}_g1000.sequence'.format( args['<input-dir>'], args['--phone'])) standard_data.meta_input.set_recovery(input_data.recovery) standard_data.meta_target.set_recovery(target_data.recovery) batch_input_dir = args['<input-dir>'] def predict(key, args): def concat_prediction(prediction1, prediction2): # link data dataset1 = prediction1.dataset keys1 = prediction1.keys identifier1 = prediction1.identifier dataset2 = prediction2.dataset keys2 = prediction2.keys identifier2 = prediction2.identifier
def process_data(dir, phone, date): def process_g1000(): raw_g1000_data = raw.RawG1000Data('{}/g1000'.format(dir), want=[date]) ext_g1000_data = flight.FlightExtensionData(raw_g1000_data) ext_g1000_data.append_num_diff(key='alt', new_key='spd_alt', step=5, pad='repeat_base') ext_g1000_data.append_num_diff(key='lat', new_key='spd_lat', step=5, pad='repeat_base') ext_g1000_data.append_num_diff(key='long', new_key='spd_long', step=5, pad='repeat_base') ext_g1000_data.append_ground_speed(spd_lat_key='spd_lat', spd_long_key='spd_long', new_key='spd_gd') ext_g1000_data.append_num_diff(key='pitch', new_key='spd_pitch', step=5, pad='repeat_base') ext_g1000_data.append_num_diff(key='roll', new_key='spd_roll', step=5, pad='repeat_base') ext_g1000_data.append_deg_diff(key='heading', new_key='spd_heading', step=5, pad='repeat_base') ext_g1000_data.append_deg_sin(key='pitch', new_key='sin_pitch') ext_g1000_data.append_deg_sin(key='roll', new_key='sin_roll') ext_g1000_data.append_deg_sin(key='heading', new_key='sin_heading') ext_g1000_data.append_deg_cos(key='pitch', new_key='cos_pitch') ext_g1000_data.append_deg_cos(key='roll', new_key='cos_roll') ext_g1000_data.append_deg_cos(key='heading', new_key='cos_heading') return ext_g1000_data def process_phone(): raw_phone_data = raw.RawPhoneData('{}/{}'.format(dir, phone), want=[date]) ext_phone_data = flight.FlightExtensionData(raw_phone_data) ext_phone_data.append_num_diff(key='alt', new_key='spd_alt', step=5, pad='repeat_base') ext_phone_data.append_num_diff(key='lat', new_key='spd_lat', step=5, pad='repeat_base') ext_phone_data.append_num_diff(key='long', new_key='spd_long', step=5, pad='repeat_base') ext_phone_data.append_ground_speed(spd_lat_key='spd_lat', spd_long_key='spd_long', new_key='spd_gd') ext_phone_data.append_num_diff(key='spd_alt', new_key='acc_alt', step=1, pad='repeat_base') ext_phone_data.append_num_diff(key='spd_lat', new_key='acc_lat', step=1, pad='repeat_base') ext_phone_data.append_num_diff(key='spd_long', new_key='acc_long', step=1, pad='repeat_base') return ext_phone_data ext_g1000_data = process_g1000() ext_phone_data = process_phone() prn_g1000_data = flight.FlightPruneData(ext_g1000_data) prn_phone_data = flight.FlightPruneData(ext_phone_data) g1000_date = set([itr.split('_')[0] for itr in prn_g1000_data.identifier]) phone_date = set(prn_phone_data.identifier) share_date = g1000_date & phone_date union_date = g1000_date | phone_date share_date, union_date = list(sorted(share_date)), list(sorted(union_date)) for date in union_date: if date in share_date: logging.info("Detect Date - \033[32;1m{}\033[0m".format(date)) elif date in g1000_date: logging.warning( "Detect Date - \033[31;1m{}\033[0m (G1000)".format(date)) elif date in phone_date: logging.warning("Detect Date - \033[31;1m{}\033[0m ({})".format( date, phone_type)) else: raise NotImplementedError # discard data not in the intersection g1000_discard = [ itr for itr in prn_g1000_data.identifier if itr.split('_')[0] not in share_date ] prn_g1000_data.prune_identifier(discard_identifier=g1000_discard) prn_phone_data.prune_identifier(remain_identifier=share_date) prn_g1000_data.prune_identifier(discard_identifier=constant.HIZARD_FLIGHTS) prn_g1000_data.detect_parking(method='time') phone_requirment = prn_g1000_data.time_date_flights() prn_phone_data.prune_identifier(remain_identifier=phone_requirment.keys()) prn_phone_data.detect_parking(method='time', time_flights=phone_requirment) prn_g1000_data.prune_parking() prn_phone_data.prune_parking() g1000_idt = set(prn_g1000_data.identifier) phone_idt = set(prn_phone_data.identifier) share_idt = g1000_idt & phone_idt union_idt = g1000_idt | phone_idt share_idt, union_idt = list(sorted(share_idt)), list(sorted(union_idt)) for idt in union_idt: if idt in share_idt: logging.info("Valid Record: \033[32;1m{}\033[0m".format(idt)) elif idt in g1000_idt: logging.warning( "Redundant Record: \033[31;1m{}\033[0m (G1000)".format(idt)) elif idt in phone_idt: logging.warning( "Redundant Record: \033[31;1m{}\033[0m ({})".format( idt, phone_type)) else: raise NotImplementedError prn_g1000_data.prune_identifier( remain_identifier=prn_phone_data.identifier) seq_pair_data = frame.FlightSequencePairData(entity_input=prn_phone_data, entity_target=prn_g1000_data) seq_pair_data.align_and_interpolate(match_keys=('alt', 'lat', 'long')) seq_pair_data.distribute() return seq_pair_data
def main(): args = parse_args() logger = config_log() # regenerate flight data from raw data if args['--update']: input_data, target_data = data_meta( args['<input-dir>'], args['<output-dir>'], args['--phone'], args=args) standard_data = data_flight( args['<output-dir>'], args['<output-dir>'], args['--phone'], args=args) standard_data.meta_input.set_recovery(input_data.recovery) standard_data.meta_target.set_recovery(target_data.recovery) batch_input_dir = args['<output-dir>'] else: input_data = flight.FlightExtensionData( '{}/{}.extension'.format(args['<input-dir>'], args['--phone'])) target_data = flight.FlightExtensionData( '{}/g1000.extension'.format(args['<input-dir>'])) standard_data = frame.FlightSequencePairData( '{}/{}_g1000.sequence'.format(args['<input-dir>'], args['--phone'])) standard_data.meta_input.set_recovery(input_data.recovery) standard_data.meta_target.set_recovery(target_data.recovery) batch_input_dir = args['<input-dir>'] # evaluate processed data if args['--eval']: evaluate(standard_data.meta_input, standard_data.meta_target) # real process feat = args['--win'] key = args['--keyword'] mname = args['--model'] output = args['<output-dir>'] phone = args['--phone'] for i in range(args['--try']): if key == 'hazard': model, normal, loss, prediction = hazardous( key, mname, batch_input_dir, output, fake=args['--fake'], args=args) elif args['--trig']: model, normal, loss, prediction = trigonometrics( key, mname, standard_data, batch_input_dir, output, fake=args['--fake'], args=args) elif args['--diff']: model, normal, loss, prediction = difference( key, mname, standard_data, batch_input_dir, output, fake=args['--fake'], args=args) else: continue if i == 0 or loss < best_loss: logging.info('Save better model') best_loss = loss if args['--stratux'] is None: pass else: phone = "stratux-{}".format(args['--stratux']) if key == 'hazard': key = "hazard-{}".format(args['--threshold']) else: pass feat = "win-{}".format(feat) name = "{}.{}.{}.{}".format(phone, key, feat, mname) if 'hazard' in key: result = evaluate_label_diff( standard=prediction, std_key='hazard', observe=prediction , obv_key='*hazard') else: result = evaluate_abs_diff( standard=prediction, std_key=key, observe=prediction , obv_key="*{}".format(key)) torch.save(model, './model/{}.model'.format(name)) torch.save(normal, './model/{}.normal'.format(name)) torch.save(loss, './model/{}.loss'.format(name)) torch.save(result, './model/{}.result'.format(name)) prediction.save('{}/predict/{}/predict.tar'.format(output, name)) if not args['--no-plot']: prediction.plot( '{}/predict/{}'.format(output, name), prediction_label="{} + Neural Network (Prediction)".format( phone[0].upper() + phone[1:].lower()), target_label="G1000 (Target)") else: logging.info('Ignore worse model')
def data_batch(input_dir, output_dir, phone_type, target_key, rnn=False, batch_size=16, shuffle=False, select_rate=(0.0, 1.0, 'large'), window_config=None, set_normal=None, return_len_dict=False, args=None): """Generate batch loader Args ---- input_dir : str root directory to load data output_dir : str root directory to save data phone_type : str phone type as input target_key : str target keyword batch_size : int batch size shuffle : bool if should shuffle batch loader select_rate : tuple proportion to select from original data large mode will extend select range on both head and tail small mode will truncate select range on both head and tail window_config : dict configuration of frame window set_normal : None or frame.BasePairData.Normalization normalize with given argument or return new normalization return_len_dict : bool if return length dict for future conversion back to sequence args : dict global arguments Returns ------- batch_loader : batch.FramePairDataLoader batch loader normal : frame.BasePairData.Normalization normalization parameters len_dict : dict dict of length of each sequence data It will load sequence pair data, and convert into batches. It will discard useless keywords, and can truncate flights to generate different data loader. """ # load sequence data seq_pair_data = frame.FlightSequencePairData('{}/{}_g1000.sequence'.format(input_dir, phone_type)) if args['--limit'] is not None: seq_pair_data.prune_identifier(seq_pair_data.identifier[:args['--limit']]) else: pass # extend hazardous state for g1000 # It must locate after alignment and interpolation if args['--keyword'] == 'hazard': meta_target = seq_pair_data.meta_target meta_target.append_hazard(threshold=args['--threshold'], roll_key='roll') seq_pair_data.update_target(meta_target) # remain only necessary keywords if args['--stratux'] is None: seq_pair_data.prune_keys( input_remain_keys=constant.INPUT_KEYWORDS, target_remain_keys=(target_key,)) else: lv = args['--stratux'] logging.info("Stratux Level {} Batch".format(lv)) if args['--keyword'] == 'hazard': input_key = 'roll' else: input_key = target_key.split('_')[1] if lv == 0: input_remain_keys = (input_key,) elif lv == 1: input_remain_keys = ('alt', 'lat', 'long', input_key) elif lv == 2: input_remain_keys = ('alt', 'lat', 'long', 'pitch', 'roll', 'heading') else: raise NotImplementedError seq_pair_data.prune_keys( input_remain_keys=input_remain_keys, target_remain_keys=(target_key,)) # remain only selective range of flights num_flights = len(seq_pair_data) begin, end = select_rate[0:2] if select_rate[2] == 'large': begin = int(math.floor(num_flights * begin)) end = int(math.ceil(num_flights * end)) elif select_rate[2] == 'small': begin = int(math.ceil(num_flights * begin)) end = int(math.floor(num_flights * end)) else: raise NotImplementedError if begin == end: if end == num_flights: begin -= 1 else: end += 1 seq_pair_data.prune_identifier( remain_identifier=seq_pair_data.identifier[begin:end]) # normalize sequence data on time domain if not args['--freq'] and not args['--no-normal']: if set_normal: seq_pair_data.normalize(set_normal) else: seq_pair_data.normalize() normal = seq_pair_data.normal else: normal = None # divide sequence data into frames frame_pair_data = frame.FlightFramePairData( seq_pair_data, input_win_len =window_config['input'] ['length'], target_win_len=window_config['target']['length'], input_win_offset =window_config['input'] ['offset_length'], target_win_offset=window_config['target']['offset_length'], input_win_offset_rate =window_config['input'] ['offset_rate'], target_win_offset_rate=window_config['target']['offset_rate'], input_pad =window_config['input'] ['padding'], target_pad=window_config['target']['padding']) # transform to frequency domain and normalize if args['--freq']: if args['--freq'] == 'haar': frame_pair_data = frame.FlightFramePairData.time_to_haar( frame_pair_data, concat=True) if not args['--no-normal']: if set_normal: frame_pair_data.normalize(set_normal) else: frame_pair_data.normalize() normal = frame_pair_data.normal else: normal = None else: raise NotImplementedError # generate batch loader if rnn: batch_loader = batch.TimestepPairDataLoader(frame_pair_data, timestep=batch_size) else: batch_loader = batch.FramePairDataLoader( frame_pair_data, batch_size=batch_size, shuffle=shuffle, drop_last=False) if not return_len_dict: return batch_loader, normal else: return batch_loader, normal, seq_pair_data.length_dict()
def data_flight(input_dir, output_dir, phone_type, args=None): """Generate flight data Args ---- input_dir : str root directory to load data output_dir : str root directory to save data phone_type : str phone type as input args : dict global arguments Returns ------- seq_pair_data : frame.FlightSequencePairData sequence pair data after all data processing It will load extended data, align and truncate data to remain only pure and necessary flight data, and normalize data for neural network. """ # load extension data ext_g1000_data = flight.FlightExtensionData('{}/g1000.extension'.format(input_dir)) ext_phone_data = flight.FlightExtensionData('{}/{}.extension'.format(input_dir, phone_type)) # generate prune data prn_g1000_data = flight.FlightPruneData(ext_g1000_data) prn_phone_data = flight.FlightPruneData(ext_phone_data) # only focus on identifier intersection between g1000 and phone g1000_date = set([itr.split('_')[0] for itr in prn_g1000_data.identifier]) phone_date = set(prn_phone_data.identifier) share_date = g1000_date & phone_date union_date = g1000_date | phone_date share_date, union_date = list(sorted(share_date)), list(sorted(union_date)) for date in union_date: if date in share_date: logging.info("Detect Date - \033[32;1m{}\033[0m".format(date)) elif date in g1000_date: logging.warning("Detect Date - \033[31;1m{}\033[0m (G1000)".format(date)) elif date in phone_date: logging.warning("Detect Date - \033[31;1m{}\033[0m ({})".format(date, phone_type)) else: raise NotImplementedError # discard data not in the intersection g1000_discard = [itr for itr in prn_g1000_data.identifier if itr.split('_')[0] not in share_date] prn_g1000_data.prune_identifier(discard_identifier=g1000_discard) prn_phone_data.prune_identifier(remain_identifier=share_date) # plot preview if args['--preview-plot']: prev_g1000_data = prn_g1000_data.clone() prev_phone_data = prn_phone_data.clone() prev_g1000_data.prune_keys(remain_keys=['alt', 'lat', 'long', 'time']) prev_phone_data.prune_keys(remain_keys=['alt', 'lat', 'long', 'time']) prev_g1000_data.plot('{}/preview/g1000'.format(output_dir)) prev_phone_data.plot('{}/preview/{}'.format(output_dir, phone_type)) # detect pure flight data for g1000 according to given requirement (no) prn_g1000_data.prune_identifier(discard_identifier=constant.HIZARD_FLIGHTS) prn_g1000_data.detect_parking(method='time') # detect pure flight data for phone according to given requirement phone_requirment = prn_g1000_data.time_date_flights() prn_phone_data.prune_identifier(remain_identifier=phone_requirment.keys()) prn_phone_data.detect_parking(method='time', time_flights=phone_requirment) # plot parking criterion if not args['--no-plot']: prn_g1000_data.plot_parking_criterion('{}/park/g1000'.format(output_dir)) prn_phone_data.plot_parking_criterion('{}/park/{}'.format(output_dir, phone_type)) # prune parking data for both phone and g1000 prn_g1000_data.prune_parking() prn_phone_data.prune_parking() # check if there are missing record from phone records g1000_idt = set(prn_g1000_data.identifier) phone_idt = set(prn_phone_data.identifier) share_idt = g1000_idt & phone_idt union_idt = g1000_idt | phone_idt share_idt, union_idt = list(sorted(share_idt)), list(sorted(union_idt)) for idt in union_idt: if idt in share_idt: logging.info("Valid Record: \033[32;1m{}\033[0m".format(idt)) elif idt in g1000_idt: logging.warning("Redundant Record: \033[31;1m{}\033[0m (G1000)".format(idt)) elif idt in phone_idt: logging.warning("Redundant Record: \033[31;1m{}\033[0m ({})".format(idt, phone_type)) else: raise NotImplementedError # It is possible phone data record less flights than g1000 on the same date # (e.g. not enough battery) prn_g1000_data.prune_identifier(remain_identifier=prn_phone_data.identifier) # align prune data seq_pair_data = frame.FlightSequencePairData(entity_input=prn_phone_data, entity_target=prn_g1000_data) seq_pair_data.align_and_interpolate(match_keys=('alt', 'lat', 'long')) seq_pair_data.distribute() # plot alignment criterion if not args['--no-plot']: seq_pair_data.plot_match_criterion('{}/wrap/{}_g1000'.format(output_dir, phone_type)) # save sequence data seq_pair_data.save('{}/{}_g1000.sequence'.format(output_dir, phone_type)) return seq_pair_data