def process_outlier_and_stack(interim_path, file_name, phase_str,
                              processed_path):
    data_nc = load_pkl(interim_path, file_name)
    # Outlier processing
    for v in obs_var:
        data_nc['input_obs'][v] = process_outlier_and_normalize(
            data_nc['input_obs'][v], obs_range_dic[v])
        data_nc['ground_truth'][v] = process_outlier_and_normalize(
            data_nc['ground_truth'][v], obs_range_dic[v])
    for v in ruitu_var:
        data_nc['input_ruitu'][v] = process_outlier_and_normalize(
            data_nc['input_ruitu'][v], ruitu_range_dic[v])

    stacked_data = [data_nc['input_obs'][v] for v in obs_var]
    stacked_input_obs = np.stack(stacked_data, axis=-1)

    stacked_data = [data_nc['input_ruitu'][v] for v in ruitu_var]
    stacked_input_ruitu = np.stack(stacked_data, axis=-1)

    stacked_data = [data_nc['ground_truth'][v] for v in target_var]
    stacked_ground_truth = np.stack(stacked_data, axis=-1)

    print(
        stacked_input_obs.shape)  #(sample_ind, timestep, station_id, features)
    print(stacked_input_ruitu.shape)
    print(stacked_ground_truth.shape)

    data_dic = {
        'input_obs': stacked_input_obs,
        'input_ruitu': stacked_input_ruitu,
        'ground_truth': stacked_ground_truth
    }
    #normalize

    save_pkl(data_dic, processed_path, '{}_norm.dict'.format(phase_str))
Ejemplo n.º 2
0
def train(processed_path, train_data, val_data, model_save_path, model_name):
    train_dict = load_pkl(processed_path, train_data)
    val_dict = load_pkl(processed_path, val_data)

    print(train_dict.keys())
    print('Original input_obs data shape:')
    print(train_dict['input_obs'].shape)
    print(val_dict['input_obs'].shape)

    print('After clipping the 9 days, input_obs data shape:')
    train_dict['input_obs'] = train_dict['input_obs'][:, :-9, :, :]
    val_dict['input_obs'] = val_dict['input_obs'][:, :-9, :, :]
    print(train_dict['input_obs'].shape)
    print(val_dict['input_obs'].shape)

    enc_dec = Seq2Seq_Class(model_save_path=model_save_path,
                            model_structure_name=model_name,
                            model_weights_name=model_name,
                            model_name=model_name)
    enc_dec.build_graph()

    val_size = val_dict['input_ruitu'].shape[0]  # 87 val samples
    val_ids = []
    val_times = []
    for i in range(10):
        val_ids.append(np.ones(shape=(val_size, 37)) * i)
    val_ids = np.stack(val_ids, axis=-1)
    print('val_ids.shape is:', val_ids.shape)
    val_times = np.array(range(37))
    val_times = np.tile(val_times, (val_size, 1))
    print('val_times.shape is:', val_times.shape)

    enc_dec.fit(train_dict['input_obs'],
                train_dict['input_ruitu'],
                train_dict['ground_truth'],
                val_dict['input_obs'],
                val_dict['input_ruitu'],
                val_dict['ground_truth'],
                val_ids=val_ids,
                val_times=val_times,
                iterations=10000,
                batch_size=512,
                validation=True)

    print('Training finished!')
def Load_and_predict(model_save_path, model_name, processed_path, test_file_name, saved_csv_path, saved_csv_name):
    
    #TODO: delete class!
    
    # load json and create model
    json_file = open(model_save_path+model_name+'.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    model = model_from_json(loaded_model_json)
    print(model.summary())
    # load weights into new model
    model.load_weights(model_save_path+model_name+'.h5')

    ## Load test data
    test_file= test_file_name
    test_data= load_pkl(processed_path, test_file)

    test_inputs = test_data['input_obs']
    test_ruitu = test_data['input_ruitu']

    test_inputs = np.expand_dims(test_inputs, axis=0)
    test_ruitu = np.expand_dims(test_ruitu, axis=0)
    #add test ids
    test_ids=[]
    for i in range(10):
        test_ids.append(np.ones(shape=(1,37))*i)
    test_ids = np.stack(test_ids, axis=-1)
    # add time
    test_size = test_inputs.shape[0]
    test_times = np.array(range(37))
    test_times = np.tile(test_times,(test_size,1))

    pred_result, pred_var_result = predict(model, test_inputs, test_ruitu, test_ids, test_times)

    print(pred_result.shape)
    print(pred_var_result.shape)

    ### save the result for submit
    df_empty = renorm_for_submit(pred_mean=pred_result[0], pred_var=pred_var_result[0], ruitu_inputs=test_ruitu[0],
                                        timestep_to_ensemble=21, alpha=1)

    df_empty = df_empty.rename(columns={"t2m":"       t2m", 
                             "rh2m":"      rh2m",
                            "w10m":"      w10m"})

    save_path = saved_csv_path
    df_empty.to_csv(path_or_buf=save_path+saved_csv_name, header=True, index=False)
    print('Ok! You can submit now!')
Ejemplo n.º 4
0
def main():
    """ main """
    apar = argparse.ArgumentParser(description="Benchmark Incident Classifier")
    apar.add_argument("-m", "--model", required=True)
    apar.add_argument("-f", "--file", required=True)
    apar.add_argument("-c", "--col", nargs="*")
    args = apar.parse_args()
    model_filename = args.model
    csv_filename = args.file
    cols = args.col
    t0 = time()
    MODEL = helper.load_pkl(model_filename)
    print("model loaded:\t{:0.3f}s".format((time() - t0)))
    test = helper.load_csv(csv_filename)
    t0 = time()
    test = helper.normalize_multiproc(test)
    print("normalization done:\t{:0.3f}s".format((time() - t0)))
    vectorizer = MODEL[config.VECTORIZERNAME]
    X_test = vectorizer.transform(test[config.NORMTEXTCOL])
    if cols is None:
        benchmark_multiple(MODEL[config.CLASSIFIER], X_test, test[list(config.CLASSCOLS)])
    else:
        for col in cols:
            benchmark_single(MODEL[config.CLASSIFIER], X_test, test[col], col)
Ejemplo n.º 5
0
    def classify(self, text):
        """ EmailIncClassifierService """
        normtext = helper.normalize_str(text)
        x = VECTORIZER.transform([normtext])
        model = MODEL[config.CLASSIFIER]
        pred = []
        for classcol in config.CLASSCOLS:
            pred.append(model[classcol].predict(x))
        return pred[0][0], pred[1][0], pred[2][0], pred[3][0]


APPLICATION = Application([EmailIncClassifierService],
                          'org.michep.inclassifier.soap',
                          in_protocol=Soap11(validator='lxml'),
                          out_protocol=Soap11())

WSGI_APPLICATION = WsgiApplication(APPLICATION)

if __name__ == '__main__':
    MODEL = helper.load_pkl("model.pkl")
    VECTORIZER = MODEL[config.VECTORIZERNAME]

    logging.basicConfig(level=logging.ERROR)
    logging.getLogger('spyne.protocol.xml').setLevel(logging.DEBUG)

    logging.info("listening to http://127.0.0.1:8000")
    logging.info("wsdl is at: http://localhost:8000/?wsdl")

    SERVER = make_server('127.0.0.1', 8000, WSGI_APPLICATION)
    SERVER.serve_forever()
Ejemplo n.º 6
0
                                           batch_size=args.batch_size,
                                           pin_memory=True)

criterion = nn.CrossEntropyLoss().cuda()

model = vgg16(pretrained=True).to(args.device)

show_summary(model)

# save apoz pkl
if not os.path.exists(args.apoz_path):
    apoz = APoZ(model).get_apoz(valid_loader, criterion)
    save_pkl(apoz, args.apoz_path)

else:
    apoz = load_pkl(args.apoz_path)

# info apoz
print("Average Percentage Of Zero Mean")
for n, p in zip(module_name, apoz):
    print(f"{n} : {p.mean() * 100 : .2f}%")

# Masking
mask = []

for i, p in enumerate(apoz[-3:-1]):
    sorted_arg = np.argsort(p)
    mask.append(sorted_arg < select_rate[i])

# Conv 5-3 [output]
model.features[-3] = conv_post_mask(model.features[-3], mask[0])
Ejemplo n.º 7
0
import nnabla as nn

from manipulate import generate
from train import calculate_scales
from helper import load_pkl
from args import get_args
from nnabla.ext_utils import get_extension_context

if __name__ == '__main__':
    args = get_args(test=True)

    if args.gpu:
        ctx = get_extension_context('cudnn', device_id='0')
        nn.set_default_context(ctx)

    reals = load_pkl(args.load_reals)
    Zs = load_pkl(args.load_Zs)
    noise_amps = load_pkl(args.load_noise_amps)
    nn.load_parameters(args.load)

    scale_factor, _, _ = calculate_scales(create_real_images)
    args.scale_factor = scale_factor

    generate(args, Zs, reals, noise_amps, gen_start=args.gen_start)
Ejemplo n.º 8
0
def load_pipeline(obs_df_file,
                  ruitu_df_file,
                  input_len=74,
                  output_len=37,
                  train_ratio=0.9,
                  station_id=90001,
                  only_target=True):

    print('The numbers of Obs varibles', len(obs_range_dic))
    print('The numbers of Ruitu varibles', len(ruitu_range_dic))

    # Define target variables
    targets = ['t2m', 'rh2m', 'w10m']

    # Load filled Dataframe
    obs_df = load_pkl(processed_path, obs_df_file)
    ruitu_df = load_pkl(processed_path, ruitu_df_file)

    ruitu_df.reset_index(inplace=True)
    obs_df.reset_index(inplace=True)

    ruitu_df.set_index(['sta_id', 'time_index'], inplace=True)
    obs_df.set_index(['sta_id', 'time_index'], inplace=True)

    time_format_str = '%Y-%m-%d %H:%M:%S'
    start_time = '2015-03-01 03:00:00'
    start_date = datetime.datetime.strptime(start_time, time_format_str)

    all_hours = 28512
    sta_id = station_id
    print('Selected Dataset of Station:', sta_id)
    selected_df_obs = obs_df.loc[sta_id]
    selected_df_ruitu = ruitu_df.loc[sta_id]

    selected_df_obs = reset_value_range(selected_df_obs, obs_range_dic)
    selected_df_ruitu = reset_value_range(selected_df_ruitu, ruitu_range_dic)

    # Max-min normalization
    # normalize for each column
    cols = selected_df_obs.columns
    norm_obs_df = selected_df_obs.copy()
    for c in cols:
        print('Normalizing column {}...'.format(c))
        norm_obs_df[c] = min_max_norm(selected_df_obs[c], obs_range_dic[c][0],
                                      obs_range_dic[c][1])

    print('OK! Has normalized for Observation dataframe!')

    # normalize for each column
    cols = selected_df_ruitu.columns
    norm_ruitu_df = selected_df_ruitu.copy()
    for c in cols:
        print('Normalizing column {}...'.format(c))
        norm_ruitu_df[c] = min_max_norm(selected_df_ruitu[c],
                                        ruitu_range_dic[c][0],
                                        ruitu_range_dic[c][1])

    print('OK! Has normalized for Ruitu dataframe!')

    # Fetch training and test data of numpy format

    train_obs_X, train_obs_Y, test_obs_X, test_obs_Y = get_train_test(
        norm_obs_df,
        input_len,
        output_len,
        per=train_ratio,
        data_name='obs',
        var_name=vars_names,
        only_target=only_target)

    train_ruitu_X, train_ruitu_Y, test_ruitu_X, test_ruitu_Y = get_train_test(
        norm_ruitu_df,
        input_len,
        output_len,
        per=train_ratio,
        data_name='ruitu',
        var_name=vars_names)

    print('Obs X shape:', train_obs_X.shape)
    print('Obs Y shape:', train_obs_Y.shape)
    print('Ruitu X shape:', train_ruitu_X.shape)
    print('Ruitu Y shape:', train_ruitu_Y.shape)

    return {
        'train_set': [train_obs_X, train_obs_Y, train_ruitu_X, train_ruitu_Y],
        'test_set': [test_obs_X, test_obs_Y, test_ruitu_X, test_ruitu_Y]
    }