def process_outlier_and_stack(interim_path, file_name, phase_str, processed_path): data_nc = load_pkl(interim_path, file_name) # Outlier processing for v in obs_var: data_nc['input_obs'][v] = process_outlier_and_normalize( data_nc['input_obs'][v], obs_range_dic[v]) data_nc['ground_truth'][v] = process_outlier_and_normalize( data_nc['ground_truth'][v], obs_range_dic[v]) for v in ruitu_var: data_nc['input_ruitu'][v] = process_outlier_and_normalize( data_nc['input_ruitu'][v], ruitu_range_dic[v]) stacked_data = [data_nc['input_obs'][v] for v in obs_var] stacked_input_obs = np.stack(stacked_data, axis=-1) stacked_data = [data_nc['input_ruitu'][v] for v in ruitu_var] stacked_input_ruitu = np.stack(stacked_data, axis=-1) stacked_data = [data_nc['ground_truth'][v] for v in target_var] stacked_ground_truth = np.stack(stacked_data, axis=-1) print( stacked_input_obs.shape) #(sample_ind, timestep, station_id, features) print(stacked_input_ruitu.shape) print(stacked_ground_truth.shape) data_dic = { 'input_obs': stacked_input_obs, 'input_ruitu': stacked_input_ruitu, 'ground_truth': stacked_ground_truth } #normalize save_pkl(data_dic, processed_path, '{}_norm.dict'.format(phase_str))
def train(processed_path, train_data, val_data, model_save_path, model_name): train_dict = load_pkl(processed_path, train_data) val_dict = load_pkl(processed_path, val_data) print(train_dict.keys()) print('Original input_obs data shape:') print(train_dict['input_obs'].shape) print(val_dict['input_obs'].shape) print('After clipping the 9 days, input_obs data shape:') train_dict['input_obs'] = train_dict['input_obs'][:, :-9, :, :] val_dict['input_obs'] = val_dict['input_obs'][:, :-9, :, :] print(train_dict['input_obs'].shape) print(val_dict['input_obs'].shape) enc_dec = Seq2Seq_Class(model_save_path=model_save_path, model_structure_name=model_name, model_weights_name=model_name, model_name=model_name) enc_dec.build_graph() val_size = val_dict['input_ruitu'].shape[0] # 87 val samples val_ids = [] val_times = [] for i in range(10): val_ids.append(np.ones(shape=(val_size, 37)) * i) val_ids = np.stack(val_ids, axis=-1) print('val_ids.shape is:', val_ids.shape) val_times = np.array(range(37)) val_times = np.tile(val_times, (val_size, 1)) print('val_times.shape is:', val_times.shape) enc_dec.fit(train_dict['input_obs'], train_dict['input_ruitu'], train_dict['ground_truth'], val_dict['input_obs'], val_dict['input_ruitu'], val_dict['ground_truth'], val_ids=val_ids, val_times=val_times, iterations=10000, batch_size=512, validation=True) print('Training finished!')
def Load_and_predict(model_save_path, model_name, processed_path, test_file_name, saved_csv_path, saved_csv_name): #TODO: delete class! # load json and create model json_file = open(model_save_path+model_name+'.json', 'r') loaded_model_json = json_file.read() json_file.close() model = model_from_json(loaded_model_json) print(model.summary()) # load weights into new model model.load_weights(model_save_path+model_name+'.h5') ## Load test data test_file= test_file_name test_data= load_pkl(processed_path, test_file) test_inputs = test_data['input_obs'] test_ruitu = test_data['input_ruitu'] test_inputs = np.expand_dims(test_inputs, axis=0) test_ruitu = np.expand_dims(test_ruitu, axis=0) #add test ids test_ids=[] for i in range(10): test_ids.append(np.ones(shape=(1,37))*i) test_ids = np.stack(test_ids, axis=-1) # add time test_size = test_inputs.shape[0] test_times = np.array(range(37)) test_times = np.tile(test_times,(test_size,1)) pred_result, pred_var_result = predict(model, test_inputs, test_ruitu, test_ids, test_times) print(pred_result.shape) print(pred_var_result.shape) ### save the result for submit df_empty = renorm_for_submit(pred_mean=pred_result[0], pred_var=pred_var_result[0], ruitu_inputs=test_ruitu[0], timestep_to_ensemble=21, alpha=1) df_empty = df_empty.rename(columns={"t2m":" t2m", "rh2m":" rh2m", "w10m":" w10m"}) save_path = saved_csv_path df_empty.to_csv(path_or_buf=save_path+saved_csv_name, header=True, index=False) print('Ok! You can submit now!')
def main(): """ main """ apar = argparse.ArgumentParser(description="Benchmark Incident Classifier") apar.add_argument("-m", "--model", required=True) apar.add_argument("-f", "--file", required=True) apar.add_argument("-c", "--col", nargs="*") args = apar.parse_args() model_filename = args.model csv_filename = args.file cols = args.col t0 = time() MODEL = helper.load_pkl(model_filename) print("model loaded:\t{:0.3f}s".format((time() - t0))) test = helper.load_csv(csv_filename) t0 = time() test = helper.normalize_multiproc(test) print("normalization done:\t{:0.3f}s".format((time() - t0))) vectorizer = MODEL[config.VECTORIZERNAME] X_test = vectorizer.transform(test[config.NORMTEXTCOL]) if cols is None: benchmark_multiple(MODEL[config.CLASSIFIER], X_test, test[list(config.CLASSCOLS)]) else: for col in cols: benchmark_single(MODEL[config.CLASSIFIER], X_test, test[col], col)
def classify(self, text): """ EmailIncClassifierService """ normtext = helper.normalize_str(text) x = VECTORIZER.transform([normtext]) model = MODEL[config.CLASSIFIER] pred = [] for classcol in config.CLASSCOLS: pred.append(model[classcol].predict(x)) return pred[0][0], pred[1][0], pred[2][0], pred[3][0] APPLICATION = Application([EmailIncClassifierService], 'org.michep.inclassifier.soap', in_protocol=Soap11(validator='lxml'), out_protocol=Soap11()) WSGI_APPLICATION = WsgiApplication(APPLICATION) if __name__ == '__main__': MODEL = helper.load_pkl("model.pkl") VECTORIZER = MODEL[config.VECTORIZERNAME] logging.basicConfig(level=logging.ERROR) logging.getLogger('spyne.protocol.xml').setLevel(logging.DEBUG) logging.info("listening to http://127.0.0.1:8000") logging.info("wsdl is at: http://localhost:8000/?wsdl") SERVER = make_server('127.0.0.1', 8000, WSGI_APPLICATION) SERVER.serve_forever()
batch_size=args.batch_size, pin_memory=True) criterion = nn.CrossEntropyLoss().cuda() model = vgg16(pretrained=True).to(args.device) show_summary(model) # save apoz pkl if not os.path.exists(args.apoz_path): apoz = APoZ(model).get_apoz(valid_loader, criterion) save_pkl(apoz, args.apoz_path) else: apoz = load_pkl(args.apoz_path) # info apoz print("Average Percentage Of Zero Mean") for n, p in zip(module_name, apoz): print(f"{n} : {p.mean() * 100 : .2f}%") # Masking mask = [] for i, p in enumerate(apoz[-3:-1]): sorted_arg = np.argsort(p) mask.append(sorted_arg < select_rate[i]) # Conv 5-3 [output] model.features[-3] = conv_post_mask(model.features[-3], mask[0])
import nnabla as nn from manipulate import generate from train import calculate_scales from helper import load_pkl from args import get_args from nnabla.ext_utils import get_extension_context if __name__ == '__main__': args = get_args(test=True) if args.gpu: ctx = get_extension_context('cudnn', device_id='0') nn.set_default_context(ctx) reals = load_pkl(args.load_reals) Zs = load_pkl(args.load_Zs) noise_amps = load_pkl(args.load_noise_amps) nn.load_parameters(args.load) scale_factor, _, _ = calculate_scales(create_real_images) args.scale_factor = scale_factor generate(args, Zs, reals, noise_amps, gen_start=args.gen_start)
def load_pipeline(obs_df_file, ruitu_df_file, input_len=74, output_len=37, train_ratio=0.9, station_id=90001, only_target=True): print('The numbers of Obs varibles', len(obs_range_dic)) print('The numbers of Ruitu varibles', len(ruitu_range_dic)) # Define target variables targets = ['t2m', 'rh2m', 'w10m'] # Load filled Dataframe obs_df = load_pkl(processed_path, obs_df_file) ruitu_df = load_pkl(processed_path, ruitu_df_file) ruitu_df.reset_index(inplace=True) obs_df.reset_index(inplace=True) ruitu_df.set_index(['sta_id', 'time_index'], inplace=True) obs_df.set_index(['sta_id', 'time_index'], inplace=True) time_format_str = '%Y-%m-%d %H:%M:%S' start_time = '2015-03-01 03:00:00' start_date = datetime.datetime.strptime(start_time, time_format_str) all_hours = 28512 sta_id = station_id print('Selected Dataset of Station:', sta_id) selected_df_obs = obs_df.loc[sta_id] selected_df_ruitu = ruitu_df.loc[sta_id] selected_df_obs = reset_value_range(selected_df_obs, obs_range_dic) selected_df_ruitu = reset_value_range(selected_df_ruitu, ruitu_range_dic) # Max-min normalization # normalize for each column cols = selected_df_obs.columns norm_obs_df = selected_df_obs.copy() for c in cols: print('Normalizing column {}...'.format(c)) norm_obs_df[c] = min_max_norm(selected_df_obs[c], obs_range_dic[c][0], obs_range_dic[c][1]) print('OK! Has normalized for Observation dataframe!') # normalize for each column cols = selected_df_ruitu.columns norm_ruitu_df = selected_df_ruitu.copy() for c in cols: print('Normalizing column {}...'.format(c)) norm_ruitu_df[c] = min_max_norm(selected_df_ruitu[c], ruitu_range_dic[c][0], ruitu_range_dic[c][1]) print('OK! Has normalized for Ruitu dataframe!') # Fetch training and test data of numpy format train_obs_X, train_obs_Y, test_obs_X, test_obs_Y = get_train_test( norm_obs_df, input_len, output_len, per=train_ratio, data_name='obs', var_name=vars_names, only_target=only_target) train_ruitu_X, train_ruitu_Y, test_ruitu_X, test_ruitu_Y = get_train_test( norm_ruitu_df, input_len, output_len, per=train_ratio, data_name='ruitu', var_name=vars_names) print('Obs X shape:', train_obs_X.shape) print('Obs Y shape:', train_obs_Y.shape) print('Ruitu X shape:', train_ruitu_X.shape) print('Ruitu Y shape:', train_ruitu_Y.shape) return { 'train_set': [train_obs_X, train_obs_Y, train_ruitu_X, train_ruitu_Y], 'test_set': [test_obs_X, test_obs_Y, test_ruitu_X, test_ruitu_Y] }