def handler(event, context): """ entry point for Lambda function :param event: the Lambda event :param context: the Lambda context :return: None """ print(f"'event': {event}") print(f"'context': {context}") # ----------------------------------------------------- # EXTRACT # define ny_dataset ny_dataset = classes.Dataset("ny_dataset") ny_dataset.headers_all = ["date", "cases", "deaths"] ny_dataset.headers_key = ny_dataset.headers_all ny_dataset.match_field = "date" ny_dataset.source_url = "https://raw.githubusercontent.com/nytimes/covid-19-data/master/us.csv" # extract and print ny_dataset ny_dataset.df = extract.extract(ny_dataset.source_url) print(f"'ny_dataset.df':\n{ny_dataset.df}") # define jh_dataset jh_dataset = classes.Dataset("jh_dataset") jh_dataset.headers_all = [ "Date", "Country/Region", "Province/State", "Lat", "Long", "Confirmed", "Recovered", "Deaths" ] jh_dataset.headers_key = ["Date", "Country/Region", "Recovered"] jh_dataset.match_field = "Date" jh_dataset.source_url = \ "https://raw.githubusercontent.com/datasets/covid-19/master/data/time-series-19-covid-combined.csv" # extract and print jh_dataset jh_dataset.df = extract.extract(jh_dataset.source_url, jh_dataset.headers_key, "Country/Region", "US") print(f"'jh_dataset.df':\n{jh_dataset.df}") # ----------------------------------------------------- # TRANSFORM # transform the datasets into CovidStat Instances covid_stats = transform.transform(ny_dataset, jh_dataset) # print CovidStats print(*covid_stats, sep="\n") # ----------------------------------------------------- # LOAD # load CovidStat instances into the CovidStats DynamoDB table load.load_all(classes.CovidStat, covid_stats) load.load_json(covid_stats)
def main(): np.random.seed(7) t1 = time.time() image_path = config.image_path track_path = config.track_path track_dic_path = config.track_dic_path track_dict = load.load_json(track_dic_path) intensity_mean,intensity_std = config.intensity_mean, config.intensity_std batch_size = config.batch_size ModelCheckpoint_file = config.ModelCheckpoint_file look_back = config.look_back img_rows,img_cols = config.img_rows,config.img_cols subdir_list = [] hist_path = config.hist_path # train_x = np.random.uniform(0,1,(17, 3, 1, 512, 512)) # train_y = np.random.uniform(0,1,(17,1)) # print (train_x) # train_x = np.array(train_x,dtype = 'float32') # train_y = np.array(train_y,dtype= 'float32') # hist = model.fit(train_x, train_y, nb_epoch=1, batch_size=batch_size, verbose=2, validation_split=0.1,shuffle=False) """ count the number of image in each typhoon sequence """ image_number_dictionary={} for subdirs, dirs, files in os.walk(image_path): # print (subdirs) subdir_list.append(subdirs) for subdir in subdir_list: count = 0 for subdirs, dirs, files in os.walk(subdir): for file in files: count += 1 key = subdir.split('/')[-1] image_number_dictionary[key] = count if count < 24: print (key,count) # print (image_number_dictionary) """ check the number of images equals the number of track data? """ # for subdir in subdir_list: # for subdirs, dirs, files in os.walk(subdir): # for file in files: # # print (file) # [k1, k2] = file.split("-")[:2] # key = "".join((k1,k2)) # try: # mark = track_dict[key] # except KeyError: # print (file +'do not have track value') # for k in track_dict.keys(): # k2 = k[-6:] # typhoon number # k1 = k[:-6] # file = k1 +'-' + k2 +'*' # file_path = image_path + k2 +'/' + file # if not os.path.isfile(file_path): # print (file_path not exists) track_dict_number ={} equal_track_image_list = [] not_equal_track_image_list = [] for subdir in subdir_list: key =subdir.split('/')[-1] if len(key) > 0 and key not in ['201620','201621','201622']: track_file_path = track_path + key+'.itk' with open(track_file_path,'rb') as tsv_file: tsv_reader = csv.reader(tsv_file, delimiter='\t') count = 0 for row in tsv_reader: count += 1 track_dict_number[key] = count if count != image_number_dictionary[key]: not_equal_track_image_list.append(key) # print (key,count,image_number_dictionary[key],'not equal') if count == image_number_dictionary[key]: # print (key,count,image_number_dictionary[key],' equal') equal_track_image_list.append(key) # print (not_equal_track_image_list,'not_equal_track_image_list') # print (equal_track_image_list,'equal_track_image_list') print (len(equal_track_image_list),'lenth of eqaual track image list') # "check if track file difference is one hour, result is yes for both equal and not_eqaul_image_list " for key in not_equal_track_image_list: ts =[] track_file_path = track_path + key+'.itk' with open(track_file_path,'rb') as tsv_file: tsv_reader = csv.reader(tsv_file, delimiter='\t') for row in tsv_reader: yy = row[0] mm = row[1] dd = row[2] hh = row[3] t = datetime.datetime.strptime(yy +":" + mm +":" + dd +':' +hh, '%Y:%m:%d:%H') ts.append(t) tmp = ts[0] for i in range(1,len(ts)): dif = (ts[i] - tmp).total_seconds() # print (dif,'dif') if dif != 3600: print (dif,i,key) tmp = ts[i] # break data_folder_path = config.data_folder_path if not os.path.exists(data_folder_path): equal_track_image_list = np.array(equal_track_image_list) np.random.shuffle(equal_track_image_list) equal_track_image_list = list(equal_track_image_list) # equal_track_image_list = equal_track_image_list[:2] train_folder = equal_track_image_list[:int(0.9 * len(equal_track_image_list))] test_folder = equal_track_image_list[int(0.9* len(equal_track_image_list)):] with open(data_folder_path,'w') as f: json.dump({'train_folder':train_folder,'test_folder': test_folder},f) print ('data_folder_path dumped to: ',data_folder_path) else: with open(data_folder_path,'r') as f: data_folder = json.load(f) train_folder = data_folder['train_folder'] test_folder = data_folder['test_folder'] print ('load data folder from: ' , data_folder_path) """ data_path = config.data_path if not os.path.exists(data_path): train_x =[] train_y=[] test_x = [] test_y = [] vgg_model = VGG_16('vgg16_weights.h5') sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) vgg_model.compile(optimizer=sgd, loss='categorical_crossentropy') for key in test_folder: print(key) image_folder = image_path + key +'/' track_file_path = track_path + key + '.itk' dataset_image = prepare_dataset.dataset_2(image_folder) print (dataset_image.shape) dataset_input = get_fc2(vgg_model,dataset_image) dataset_intensity = prepare_dataset.dataset_1(track_file_path) dataset_intensity = prepare_dataset.normalize_intensity(dataset_intensity,intensity_mean,intensity_std) print (dataset_image.shape,'dataset_image.shape') print (dataset_intensity.shape,'dataset_intensity') data_x,data_y = prepare_dataset.create_dataset_2(dataset_input, dataset_intensity,look_back = look_back) test_x += data_x test_y += data_y # print test_y.shape,test_y # train_histss =[] # validation_histss=[] for key in train_folder: print(key) image_folder = image_path + key +'/' track_file_path = track_path + key + '.itk' dataset_image = prepare_dataset.dataset_2(image_folder) dataset_input = get_fc2(vgg_model,dataset_image) dataset_intensity = prepare_dataset.dataset_1(track_file_path) dataset_intensity = prepare_dataset.normalize_intensity(dataset_intensity,intensity_mean,intensity_std) print (dataset_image.shape,'dataset_image.shape') print (dataset_intensity.shape,'dataset_intensity') data_x,data_y = prepare_dataset.create_dataset_2(dataset_input, dataset_intensity,look_back = look_back) # print (len(data_x)) train_x += data_x train_y += data_y data_x = np.array(data_x) data_y = np.array(data_y) # print (data_x.shape,data_y.shape,'data_x,data_y') # train_hists=[] # validation_hists=[] # for i in range(20): # print('start train') # hist = model.fit(data_x, data_y, nb_epoch=1, batch_size=batch_size, verbose=2, validation_split=0.1,shuffle=False) # model.reset_states() # train_hists.append(hist.history['loss'][0]) # validation_hists.append(hist.history['val_loss'][0]) # # print (hists,'hists') # train_histss.append(train_hists) # validation_histss.append(validation_hists) # print (train_histss,'train_histss') # print (validation_histss, 'validation_histss') # print ((data_x.shape),data_y.shape) train_x = np.array(train_x,dtype = 'float32') train_y = np.array(train_y,dtype = 'float32') test_x = np.array(test_x,dtype = 'float32') test_y = np.array(test_y,dtype = 'float32') hf = h5py.File(data_path) hf.create_dataset('train_x',data = train_x) hf.create_dataset('train_y',data = train_y) hf.create_dataset('test_x', data= test_x) hf.create_dataset('test_y', data= test_y) hf.close() print ('dump train test data to' ,data_path) else: with h5py.File(data_path,'r') as hf: train_x = np.array(hf.get('train_x')) train_y = np.array(hf.get('train_y')) test_x = np.array(hf.get('test_x')) test_y = np.array(hf.get('test_y')) print ('loaded train test data from ', data_path) print (train_x.shape,train_y.shape) print (test_x.shape,test_y.shape) """ # get train test data from pre_built dataset dataset_image_path = 'test_file/dataset_imageset.hdf5' dataset_type_path = 'test_file/dataset_type.hdf5' hf_image = h5py.File(dataset_image_path) hf_type = h5py.File(dataset_type_path) train_x = [] train_y = [] test_x = [] test_y = [] vgg_fc2_mean = config.vgg_fc2_mean vgg_fc2_std = config.vgg_fc2_std """ dataset_imageset 0.423964 mean data 0.569374 std data 0.0 min 4.71836 max """ # train_folder =train_folder[:2] # test_folder = test_folder[:2] for key in train_folder: print(key) dataset_image = np.array(hf_image.get(key)) dataset_image = prepare_dataset.normalize_intensity(dataset_image,vgg_fc2_mean,vgg_fc2_std) #normalize image (the same function of normalize intensity) dataset_type = np.array(hf_type.get(key)) if len(dataset_image) > look_back: data_x,data_y = prepare_dataset.extend_dataset_2(dataset_image, dataset_type,look_back = look_back) train_x += data_x train_y += data_y for key in test_folder: print (key) dataset_image = np.array(hf_image.get(key)) dataset_image = prepare_dataset.normalize_intensity(dataset_image,vgg_fc2_mean,vgg_fc2_std) dataset_type = np.array(hf_type.get(key)) if len(dataset_image) > look_back: data_x,data_y = prepare_dataset.extend_dataset_2(dataset_image, dataset_type,look_back = look_back) test_x += data_x test_y += data_y hf_type.close() hf_image.close() # train = train_x + test_x train_x = np.array(train_x,dtype = 'float32') train_y = np.array(train_y,dtype = 'float32') test_x = np.array(test_x,dtype = 'float32') test_y = np.array(test_y,dtype = 'float32') print (train_x.shape,train_y.shape) print (test_x.shape,test_y.shape) # nb_classes = max(len(set(train_y)), len(set(test_y))) # print set(train_y) # print set(test_y) # print nb_classes,'nb_classes' model = pretrain_model(look_back,batch_size) if os.path.exists(ModelCheckpoint_file): print ('load load_weights',ModelCheckpoint_file) model.load_weights(ModelCheckpoint_file) print(model.summary()) y_train = np_utils.to_categorical(train_y, None) y_test = np_utils.to_categorical(test_y, None) print y_train.shape train_loss_hists=[] validation_loss_hists=[] train_acc_hists=[] validation_acc_hists=[] val_acc = sys.float_info.min for i in range(1000): print (i,'epoch') # ModelCheckpoint_file = 'test_file/orig_weights_lstm_1.0_image_lookback_'+str(look_back)+str(i)+'_whole_equal.hdf5' # print('start train') hist = model.fit(train_x, y_train, nb_epoch=1, batch_size=batch_size, verbose=2, validation_split=0.1,shuffle=False) print hist.history model.reset_states() train_loss_hists.append(hist.history['loss'][0]) validation_loss_hists.append(hist.history['val_loss'][0]) train_acc_hists.append(hist.history['acc'][0]) validation_acc_hists.append(hist.history['val_acc'][0]) if val_acc < hist.history['val_acc'][0]: model.save_weights(ModelCheckpoint_file) print(i,val_acc,'->',hist.history['val_acc'][0],'save_weights',ModelCheckpoint_file) val_acc = hist.history['val_acc'][0] # print (train_hists,'train_hists') # print (validation_hists, 'validation_hists') with open(hist_path,'w') as f: json.dump({'train_loss':train_loss_hists,'val_loss':validation_loss_hists,'train_acc':train_acc_hists,'val_acc':validation_acc_hists},f) # hist = model.fit(train_x, train_y, nb_epoch=2, batch_size=batch_size, verbose=2, validation_split = 0.1,shuffle=False) # break # with open(hist_path,'w') as j: # json.dump(hist.history,j) # validation_hists_least_index = validation_hists.index(min(validation_hists)) # print ('ModelCheckpoint_file','test_file/orig_weights_lstm_1.0_image_lookback_'+str(look_back)+str(validation_hists_least_index)+'_whole_equal.hdf5') # model.load_weights('test_file/orig_weights_lstm_1.0_image_lookback_'+str(look_back)+str(validation_hists_least_index)+'_whole_equal.hdf5') print('load_weights',ModelCheckpoint_file) model.load_weights(ModelCheckpoint_file) trainPredict = model.predict(train_x, batch_size=batch_size) model.reset_states() testPredict = model.predict(test_x, batch_size=batch_size) # # invert predictions # # calculate root mean squared error train_predictions = np.argmax(trainPredict, 1) train_labels = np.argmax(y_train, 1) test_predictions = np.argmax(testPredict, 1) test_labels = np.argmax(y_test, 1) print(look_back,'look_back') train_accuracy, train_cm = get_accuracy(train_predictions, train_labels, True) test_accuracy, test_cm = get_accuracy(test_predictions, test_labels, True) print (train_accuracy,'train accuracy') print(train_cm,'train_cm') print (test_accuracy,'test accuracy') print(test_cm,'test_cm') train_cm = train_cm.tolist() train_confusion_matrix_path = 'test_file/confusion_matrix_train_extend_normalize_'+ str( look_back) +'.json' with open(train_confusion_matrix_path, 'w') as f: json.dump(train_cm,f) test_cm = test_cm.tolist() test_confusion_matrix_path = 'test_file/confusion_matrix_test_extend_normalize_'+ str(look_back) +'.json' with open(test_confusion_matrix_path, 'w') as f: json.dump(test_cm,f) t2 = time.time() print ("using %s seconds" % (t2-t1))
def main(): t1 = time.time() train_test_file_list_path = config.train_test_file_path_divid image_path = config.image_path trackDictPath = config.track_dic_path track_dict = load.load_json(trackDictPath) suspicious_file_list_path = config.suspicious_file_list_path suspicious_file_list = load.load_json(suspicious_file_list_path) train_validation_test_subdirs_split = config.train_validation_test_subdirs_split yType = config.yType csv_path = config.csv_path confusion_matrix_path = config.confusion_matrix_path hist_path = config.hist_path nb_epoch = config.nb_epoch optimizer_choice = config.optimizer img_rows, img_cols = config.img_rows, config.img_cols model_check_pointer_file = config.ModelCheckpoint_file nb_worker = config.nb_worker num_labels = config.num_labels batch_size = config.batch_size mean_v, std_v = config.mean_v, config.std_v if not os.path.exists(train_validation_test_subdirs_split): print 'subdirs not split' subdirs_list = load.get_subdirs_list(image_path) train_subdirs_list, validation_subdirs_list, test_subdirs_list = load.split_subdirs( subdirs_list, train_validation_test_subdirs_split) else: print 'subdirs splitted' train_subdirs_list, validation_subdirs_list, test_subdirs_list = load.get_split_subdirs( train_validation_test_subdirs_split) optimizer = classification_model.optimizer_selection( optimizer_choice, nb_epoch) # model = classification_model.vgg_19_with_l2_regularizer(img_rows,img_cols,num_labels,optimizer) model_1 = classification_model.vgg_16(img_rows, img_cols, num_labels, optimizer) model_2 = classification_model.model_2() model = classification_model.merge_model(model_1, model_2, optimizer, num_labels) model.summary() # file_list = subtract_suspicious_list(file_list,suspicious_file_list) # trackDictPath = config.track_dic_path # yType = config.yType # train_file_list, test_file_list = load.get_train_test_file_split(train_subdirs_list,validation_subdirs_list,test_subdirs_list,track_dict,suspicious_file_list) # validation_file_list = train_file_list[:int(len(train_file_list) * 0.05)] # train_file_list = train_file_list[int(len(train_file_list) *0.05):] if not os.path.exists(train_test_file_list_path): print 'file_list not splited' train_file_list, validation_file_list, test_file_list = load.get_train_validation_test_file_split( train_subdirs_list, validation_subdirs_list, test_subdirs_list, track_dict, suspicious_file_list, train_test_file_list_path) else: print 'file list splitted' train_file_list, validation_file_list, test_file_list = load.load_train_validation_test_file_list( train_test_file_list_path) y_train, y_valid, y_test = load.get_train_validation_test_y( train_file_list, validation_file_list, test_file_list, trackDictPath, yType) # print len(file_list) # print len(train_file_list) # print len(validation_file_list) # print len(test_file_list) # print ('y_train',len(y_train)) # print ('y_valid', len(y_valid)) # print ('y_test',len(y_test)) # print (type(y_train)) print(y_train[0].shape, 'train shape') # train_file_list = train_file_list[:200] # validation_file_list = validation_file_list[-100:] # test_file_list = test_file_list[:100] # y_train = y_train[:200] # y_valid = y_valid[-100:] # y_test = y_test[:100] x_train = load.get_x(train_file_list) x_valid = load.get_x(validation_file_list) x_test = load.get_x(test_file_list) input_2_train = [] input_2_valid = [] input_2_test = [] for file in train_file_list: input_2_train.append(load.get_data_2(track_dict, file)) for file in validation_file_list: input_2_valid.append(load.get_data_2(track_dict, file)) for file in test_file_list: input_2_test.append(load.get_data_2(track_dict, file)) input_2_train = np.array(input_2_train) input_2_valid = np.array(input_2_valid) input_2_test = np.array(input_2_test) # print (x_train.shape)in # print(y_train.shape) # print (get_category_reverse_back(y_train),'set_y_train') # print (get_category_reverse_back(y_valid),'set_y_valid') # print (get_category_reverse_back(y_test),'set_y_test') # print (y_train.shape) # print (train_file_list, 'train_file_list') # print (validation_file_list,'validation_file_list') # print (test_file_list,'test_file_list') random_sample_index = random.sample(xrange(len(train_file_list)), int(len(train_file_list))) x_train_2 = [] y_train_2 = [] input_2_train_2 = [] for index in random_sample_index: file_path = train_file_list[index] x = load.rotate_image(file_path) y = y_train[index] # y = load.get_y_file(file_path,track_dict,yType) # print x.shape,x # print y x_train_2.append(x) y_train_2.append(y) input_2_train_2.append(load.get_data_2(track_dict, file_path)) x_train_2 = np.array(x_train_2) x_train_2 = np.reshape(x_train_2, (-1, 1, img_rows, img_cols)) input_2_train_2 = np.array(input_2_train_2) x_train = np.concatenate((x_train, x_train_2), axis=0) y_train = np.concatenate((y_train, y_train_2), axis=0) input_2_train = np.concatenate((input_2_train, input_2_train_2), axis=0) print x_train.shape print y_train.shape print x_train[0] print y_train[0] print('input_2_train', input_2_train.shape) print('input_2_valid', input_2_valid.shape) print('input_2_test', input_2_test.shape) r = random.random() random.shuffle(x_train, lambda: r) random.shuffle(y_train, lambda: r) random.shuffle(input_2_train_2, lambda: r) print x_train.shape print y_train.shape x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_valid = x_valid.astype('float32') x_train /= 255 x_valid /= 255 x_test /= 255 # # break if os.path.exists(model_check_pointer_file): model.load_weights(model_check_pointer_file) # hist = training(model,train_generator,validation_generator,img_rows,img_cols,128,nb_epoch,len(train_file_list),100, nb_worker,model_check_pointer_file) # hist = model_training(model,train_generator,validation_generator,img_rows,img_cols,32,nb_epoch,len(train_file_list),model_check_pointer_file) # hist = classification_model.model_training_whole(model,x_train,y_train,x_valid,y_valid, batch_size, nb_epoch,model_check_pointer_file) # # with open(hist_path, 'w') as f: # # json.dump(hist.history,f) checkpointer = ModelCheckpoint(filepath=model_check_pointer_file, verbose=1, save_best_only=True) # early_stop = EarlyStopping(monitor = 'val_loss', patience = 5, mode = 'min') early_stop = EarlyStopping(monitor='val_loss', patience=30, mode='min') reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=0.0001) hist = model.fit([x_train, input_2_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, validation_data=([x_valid, input_2_valid], y_valid), callbacks=[checkpointer, early_stop, reduce_lr], shuffle=True) print hist.history, 'hist' if os.path.exists(model_check_pointer_file): model.load_weights(model_check_pointer_file) # model.load_weights(model_check_pointer_file) # predictions = model_predicting(model,test_generator,len(y_test)) predictions = model.predict([x_test, input_2_test]) _predictions = np.argmax(predictions, 1) _labels = np.argmax(y_test, 1) write_to_csv(test_file_list, _predictions, _labels, csv_path) accuracy, cm = get_accuracy(_predictions, _labels, True) print(accuracy, 'test accuracy') print(optimizer_choice, 'optimizer_choice') print(cm, 'cm') cm = cm.tolist() with open(confusion_matrix_path, 'w') as f: json.dump(cm, f) t2 = time.time() print('using' + str(t2 - t1))
def main(): np.random.seed(7) t1 = time.time() image_path = config.image_path track_path = config.track_path track_dic_path = config.track_dic_path track_dict = load.load_json(track_dic_path) intensity_mean, intensity_std = config.intensity_mean, config.intensity_std batch_size = config.batch_size ModelCheckpoint_file = config.ModelCheckpoint_file look_back = config.look_back img_rows, img_cols = config.img_rows, config.img_cols subdir_list = [] hist_path = config.hist_path model = pretrain_model(look_back, batch_size) if os.path.exists(ModelCheckpoint_file): print('load load_weights', ModelCheckpoint_file) model.load_weights(ModelCheckpoint_file) print(model.summary()) # train_x = np.random.uniform(0,1,(17, 3, 1, 512, 512)) # train_y = np.random.uniform(0,1,(17,1)) # print (train_x) # train_x = np.array(train_x,dtype = 'float32') # train_y = np.array(train_y,dtype= 'float32') # hist = model.fit(train_x, train_y, nb_epoch=1, batch_size=batch_size, verbose=2, validation_split=0.1,shuffle=False) """ count the number of image in each typhoon sequence """ image_number_dictionary = {} for subdirs, dirs, files in os.walk(image_path): # print (subdirs) subdir_list.append(subdirs) for subdir in subdir_list: count = 0 for subdirs, dirs, files in os.walk(subdir): for file in files: count += 1 key = subdir.split('/')[-1] image_number_dictionary[key] = count if count < 24: print(key, count) # print (image_number_dictionary) """ check the number of images equals the number of track data? """ # for subdir in subdir_list: # for subdirs, dirs, files in os.walk(subdir): # for file in files: # # print (file) # [k1, k2] = file.split("-")[:2] # key = "".join((k1,k2)) # try: # mark = track_dict[key] # except KeyError: # print (file +'do not have track value') # for k in track_dict.keys(): # k2 = k[-6:] # typhoon number # k1 = k[:-6] # file = k1 +'-' + k2 +'*' # file_path = image_path + k2 +'/' + file # if not os.path.isfile(file_path): # print (file_path not exists) track_dict_number = {} equal_track_image_list = [] not_equal_track_image_list = [] for subdir in subdir_list: key = subdir.split('/')[-1] if len(key) > 0 and key not in ['201620', '201621', '201622']: track_file_path = track_path + key + '.itk' with open(track_file_path, 'rb') as tsv_file: tsv_reader = csv.reader(tsv_file, delimiter='\t') count = 0 for row in tsv_reader: count += 1 track_dict_number[key] = count if count != image_number_dictionary[key]: not_equal_track_image_list.append(key) # print (key,count,image_number_dictionary[key],'not equal') if count == image_number_dictionary[key]: # print (key,count,image_number_dictionary[key],' equal') equal_track_image_list.append(key) # print (not_equal_track_image_list,'not_equal_track_image_list') # print (equal_track_image_list,'equal_track_image_list') """ # check_intensities statistics data_folder = not_equal_track_image_list + equal_track_image_list intensities=[] for folder in data_folder: file_name = track_path + folder+'.itk' with open(file_name,'rb') as tsv_file: tsv_reader = csv.reader(tsv_file, delimiter='\t') for row in tsv_reader: #print row.type intensity = float(row[-2]) intensities.append(intensity) intensities = np.array(intensities) print intensities print intensities.shape print np.mean(intensities,axis=0),'mean' print np.std(intensities,axis=0),'std' print np.min(intensities,axis=0),'min' print np.max(intensities,axis =0),'max' """ print(len(equal_track_image_list), 'lenth of eqaual track image list') # "check if track file difference is one hour, result is yes for both equal and not_eqaul_image_list " for key in not_equal_track_image_list: ts = [] track_file_path = track_path + key + '.itk' with open(track_file_path, 'rb') as tsv_file: tsv_reader = csv.reader(tsv_file, delimiter='\t') for row in tsv_reader: yy = row[0] mm = row[1] dd = row[2] hh = row[3] t = datetime.datetime.strptime( yy + ":" + mm + ":" + dd + ':' + hh, '%Y:%m:%d:%H') ts.append(t) tmp = ts[0] for i in range(1, len(ts)): dif = (ts[i] - tmp).total_seconds() # print (dif,'dif') if dif != 3600: print(dif, i, key) tmp = ts[i] # break data_folder_path = config.data_folder_path if not os.path.exists(data_folder_path): equal_track_image_list = np.array(equal_track_image_list) np.random.shuffle(equal_track_image_list) equal_track_image_list = list(equal_track_image_list) # equal_track_image_list = equal_track_image_list[:2] train_folder = equal_track_image_list[:int(0.9 * len(equal_track_image_list) )] test_folder = equal_track_image_list[int(0.9 * len(equal_track_image_list)):] with open(data_folder_path, 'w') as f: json.dump( { 'train_folder': train_folder, 'test_folder': test_folder }, f) print('data_folder_path dumped to: ', data_folder_path) else: with open(data_folder_path, 'r') as f: data_folder = json.load(f) train_folder = data_folder['train_folder'] test_folder = data_folder['test_folder'] print('load data folder from: ', data_folder_path) dataset_image_dic = {} dataset_intensity_dic = {} dataset_type_dic = {} dataset_image_path = 'test_file/dataset_imageset.hdf5' dataset_intensity_path = 'test_file/dataset_intensity.hdf5' dataset_type_path = 'test_file/dataset_type.hdf5' # for key in equal_track_image_list: # print(key) # image_folder = image_path + key +'/' # track_file_path = track_path + key + '.itk' # dataset_type = prepare_dataset.dataset_1_type(track_file_path) # print (dataset_type.shape) # dataset_type_dic[key] = dataset_type # hf_type.create_dataset(key, data = dataset_type) # hf_type.close() # equal_track_image_list=equal_track_image_list[:2] # if not os.path.exists(dataset_image_path) : # vgg_model = VGG_16('vgg16_weights.h5') # sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) # vgg_model.compile(optimizer=sgd, loss='categorical_crossentropy') # hf_image = h5py.File(dataset_image_path) # hf_intensity = h5py.File(dataset_intensity_path) # # print ('dumped data into hf_image,intensity') # else: # print ('hf_image intensity exists') # for key in equal_track_image_list: # with h5py.File(dataset_image_path,'r') as hf_image: # dataset_image = np.array(hf_image.get(key)) # with h5py.File(dataset_intensity_path,'r') as hf_intensity: # dataset_intensity = np.array(hf_intensity.get(key)) # print (key, dataset_image.shape,dataset_intensity.shape) # train_selected_folder_index = random.sample(range(0,len(train_folder)),10) # test_selected_folder_index = random.sample(range(0,len(test_folder)),10) hf_image = h5py.File(dataset_image_path) hf_intensity = h5py.File(dataset_intensity_path) hf_type = h5py.File(dataset_type_path) # for i in train_selected_folder_index: # key = train_folder[i] # train_folder=['201314'] train_y_types = [] train_layer_outputs = [] # train_folder=train_folder[:2] # test_folder = test_folder[:2] train_folder = ['198811'] #scatter points for key in train_folder: print(key) if os.path.exists(ModelCheckpoint_file): print('load load_weights', ModelCheckpoint_file) model.load_weights(ModelCheckpoint_file) dataset_image = np.array(hf_image.get(key)) dataset_intensity = np.array(hf_intensity.get(key)) dataset_type = np.array(hf_type.get(key)) # print (dataset_image.shape,'dataset_image') train_x, train_y = prepare_dataset.create_dataset_2_zero( dataset_image, dataset_intensity, look_back=look_back) train_y_type = prepare_dataset.create_dataset_y_zero( dataset_type, look_back=look_back) train_x = np.array(train_x, dtype='float32') train_y = np.array(train_y, dtype='float32') train_y_types += train_y_type print len(train_y_type), key if train_x.shape[0] > 0: train_predict_image = 'test_file/tsne_visualization_12_zero_arrow/' + str( key) + '_' + str(look_back) + '_train.png' # test_sample = np.array(train_x[0], train_outputs = [] for sample in train_x: sample = np.reshape(sample, (-1, look_back, 2048)) train_output_layer = get_lstm_intermidiate_layer_output( model, sample, layer=-2) model.reset_states() train_outputs.append(train_output_layer[0]) # print train_output_layer.shape # print train_output_layer train_layer_outputs += train_outputs train_outputs = np.array(train_outputs) print train_outputs.shape Y = tsne(train_outputs, 2, 50, 20.0) colors = plt.cm.rainbow(np.linspace(0, 1, 8)) labels_sets = set(train_y_type) scatter_dic = {} fig = plt.figure() for i in labels_sets: ii = np.where(train_y_type == i)[0] x = [Y[index, 0] for index in ii] y = [Y[index, 1] for index in ii] scatter_dic[i] = plt.scatter(x, y, color=colors[int(i)]) line = plt.plot(Y[:, 0], Y[:, 1], 'k')[0] add_arrow(line) plt.legend(scatter_dic.values(), scatter_dic.keys(), scatterpoints=1, loc='lower left', ncol=6, fontsize=8) plt.xlabel(' x') plt.ylabel('y') plt.title('tsne of lstm feature,' + 'train_predicts_look_back ' + str(look_back) + ', typhoon number ' + str(key)) # plt.savefig('test_tsne_2.png') plt.savefig(train_predict_image) plt.close(fig) # fig = plt.figure() # ax = fig.add_subplot(111, projection='3d') # for i in labels_sets: # ii = np.where(train_y_type == i)[0] # x = [Y[index,0] for index in ii] # y = [Y[index,1] for index in ii] # z = [train_y[index] for index in ii] # scatter_dic[i] = ax.scatter(x,y,z,color = colors[int(i)]) # plt.legend(scatter_dic.values(),scatter_dic.keys(),scatterpoints=1,loc='lower left',ncol = 6,fontsize=8) # plt.xlabel(' x') # plt.ylabel('y') # plt.title('3d tsne of lstm feature,' +'train_predicts_look_back ' + str(look_back) + ', typhoon number ' + str(key)) # plt.savefig('test_tsne_3d_2.png') # # plt.savefig(train_predict_image) # plt.close(fig) # fig = plt.figure() # # ax.scatter(Y[:,0], Y[:,1], train_y) # ax.set_xlabel('X Label') # ax.set_ylabel('Y Label') # ax.set_zlabel('intensity Label') # plt.savefig('tsne_test.png') # break """ test_y_types=[] test_layer_outputs=[] for key in test_folder: # key = test_folder[i] print (key) if os.path.exists(ModelCheckpoint_file): print ('load load_weights',ModelCheckpoint_file) model.load_weights(ModelCheckpoint_file) dataset_image = np.array(hf_image.get(key)) dataset_intensity = np.array(hf_intensity.get(key)) dataset_type = np.array(hf_type.get(key)) test_x,test_y = prepare_dataset.create_dataset_2_zero(dataset_image, dataset_intensity,look_back = look_back) test_x = np.array(test_x,dtype = 'float32') test_y = np.array(test_y,dtype = 'float32') test_y_type = prepare_dataset.create_dataset_y_zero(dataset_type,look_back=look_back) test_y_types += test_y_type print len(test_y_type),key if test_x.shape[0] > 0: test_predict_image = 'test_file/tsne_visualization_12_zero_arrow/' + str(key)+'_'+str(look_back)+'_test.png' # test_sample = np.array(train_x[0], test_outputs=[] for sample in test_x: sample = np.reshape(sample,(-1,look_back,2048)) test_output_layer = get_lstm_intermidiate_layer_output(model,sample,layer=-2) model.reset_states() test_outputs.append(test_output_layer[0]) # print train_output_layer.shape # print train_output_layer test_layer_outputs += test_outputs test_outputs=np.array(test_outputs) print test_outputs.shape Y = tsne(test_outputs, 2, 50, 20.0); colors = plt.cm.rainbow(np.linspace(0, 1, 8)) labels_sets = set(test_y_type) scatter_dic ={} fig = plt.figure() for i in labels_sets: ii = np.where(test_y_type == i)[0] x = [Y[index,0] for index in ii] y = [Y[index,1] for index in ii] scatter_dic[i] = plt.scatter(x,y,color = colors[int(i)]) line=plt.plot(Y[:,0],Y[:,1],'k')[0] add_arrow(line) plt.legend(scatter_dic.values(),scatter_dic.keys(),scatterpoints=1,loc='lower left',ncol = 6,fontsize=8) plt.xlabel(' x') plt.ylabel('y') plt.title('tsne of lstm feature,' +'test_predicts_look_back ' + str(look_back) + ', typhoon number ' + str(key)) # plt.savefig('test_tsne_2.png') plt.savefig(test_predict_image) plt.close(fig) # fig = plt.figure() # ax = fig.add_subplot(111, projection='3d') # for i in labels_sets: # ii = np.where(test_y_type == i)[0] # x = [Y[index,0] for index in ii] # y = [Y[index,1] for index in ii] # z = [test_y[index] for index in ii] # scatter_dic[i] = ax.scatter(x,y,z,color = colors[int(i)]) # plt.legend(scatter_dic.values(),scatter_dic.keys(),scatterpoints=1,loc='lower left',ncol = 6,fontsize=8) # plt.xlabel(' x') # plt.ylabel('y') # plt.title('3d tsne of lstm feature,' +'test_predicts_look_back ' + str(look_back) + ', typhoon number ' + str(key)) # plt.savefig('test_tsne_3d_2_test.png') # # plt.savefig(train_predict_image) # plt.close(fig) # fig = plt.figure() # # ax.scatter(Y[:,0], Y[:,1], train_y) # ax.set_xlabel('X Label') # ax.set_ylabel('Y Label') # ax.set_zlabel('intensity Label') # plt.savefig('tsne_test.png') # break """ hf_image.close() hf_intensity.close() hf_type.close() # train_y_types = np.array(train_y_types) # train_layer_outputs = np.array(train_layer_outputs) # test_y_types = np.array(test_y_types) # test_layer_outputs = np.array(test_layer_outputs) # print train_y_types.shape # print test_y_types.shape # Y = tsne(train_layer_outputs, 2, 50, 20.0); # colors = plt.cm.rainbow(np.linspace(0, 1, 8)) # labels_sets = set(train_y_types) # scatter_dic ={} # fig = plt.figure() # train_predict_image = 'test_file/tsne_visualization_12_zero/' +str(look_back)+'_whole_train.png' # for i in labels_sets: # ii = np.where(train_y_types == i)[0] # x = [Y[index,0] for index in ii] # y = [Y[index,1] for index in ii] # scatter_dic[i] = plt.scatter(x,y,color = colors[int(i)]) # plt.legend(scatter_dic.values(),scatter_dic.keys(),scatterpoints=1,loc='lower left',ncol = 6,fontsize=8) # plt.xlabel(' x') # plt.ylabel('y') # plt.title('tsne of lstm feature,' +'whole train_predicts_look_back ' + str(look_back)) # # plt.savefig('test_tsne_2.png') # plt.savefig(train_predict_image) # plt.close(fig) # Y = tsne(test_layer_outputs, 2, 50, 20.0); # colors = plt.cm.rainbow(np.linspace(0, 1, 8)) # labels_sets = set(test_y_types) # scatter_dic ={} # fig = plt.figure() # test_predict_image = 'test_file/tsne_visualization_12_zero/' +str(look_back)+'_whole_test.png' # for i in labels_sets: # ii = np.where(test_y_types == i)[0] # x = [Y[index,0] for index in ii] # y = [Y[index,1] for index in ii] # scatter_dic[i] = plt.scatter(x,y,color = colors[int(i)]) # plt.legend(scatter_dic.values(),scatter_dic.keys(),scatterpoints=1,loc='lower left',ncol = 6,fontsize=8) # plt.xlabel(' x') # plt.ylabel('y') # plt.title('tsne of lstm feature,' +'whole_test_predicts_look_back ' + str(look_back) ) # # plt.savefig('test_tsne_2.png') # plt.savefig(test_predict_image) # plt.close(fig) """ train_folder = ['199307'] for key in train_folder: print(key) if os.path.exists(ModelCheckpoint_file): print ('load load_weights',ModelCheckpoint_file) model.load_weights(ModelCheckpoint_file) dataset_image = np.array(hf_image.get(key)) dataset_intensity = np.array(hf_intensity.get(key)) dataset_type = np.array(hf_type.get(key)) # print (dataset_image.shape,'dataset_image') train_x,train_y = prepare_dataset.create_dataset_2_zero(dataset_image, dataset_intensity,look_back = look_back) train_y_type = prepare_dataset.create_dataset_y_zero(dataset_type,look_back=look_back) train_x = np.array(train_x,dtype = 'float32') train_y = np.array(train_y,dtype = 'float32') train_y_types += train_y_type print len(train_y_type),key if train_x.shape[0] >0: train_predict_image = 'test_file/tsne_visualization_24_zero/' + str(key)+'_'+str(look_back)+'arrow_train.png' # test_sample = np.array(train_x[0], train_outputs=[] for sample in train_x: sample = np.reshape(sample,(-1,look_back,2048)) train_output_layer = get_lstm_intermidiate_layer_output(model,sample,layer=-2) model.reset_states() train_outputs.append(train_output_layer[0]) # print train_output_layer.shape # print train_output_layer train_layer_outputs += train_outputs train_outputs=np.array(train_outputs) print train_outputs.shape Y = tsne(train_outputs, 2, 50, 20.0); # tsne = TSNE(n_components=2, init='pca',random_state = 0) # Y = tsne.fit_transform(train_outputs) colors = plt.cm.rainbow(np.linspace(0, 1, 8)) labels_sets = set(train_y_type) scatter_dic ={} fig = plt.figure() for i in labels_sets: ii = np.where(train_y_type == i)[0] x = [Y[index,0] for index in ii] y = [Y[index,1] for index in ii] scatter_dic[i] = plt.scatter(x,y,color = colors[int(i)]) # x = Y[:0] # y = Y[:1] # print Y.shape line=plt.plot(Y[:,0],Y[:,1],'k')[0] add_arrow(line) # plt.quiver(x[:-1], y[:-1], x[1:]-x[:-1], y[1:]-y[:-1], scale_units='xy', angles='xy', scale=1) plt.legend(scatter_dic.values(),scatter_dic.keys(),scatterpoints=1,loc='lower left',ncol = 6,fontsize=8) plt.xlabel(' x') plt.ylabel('y') plt.title('tsne of lstm feature,' +'train_predicts_look_back ' + str(look_back) + ', typhoon number ' + str(key)) # plt.savefig('test_tsne_2.png') plt.savefig(train_predict_image) plt.close(fig) """ t2 = time.time() print("using %s seconds" % (t2 - t1))
def main(): np.random.seed(7) t1 = time.time() image_path = config.image_path track_path = config.track_path track_dic_path = config.track_dic_path track_dict = load.load_json(track_dic_path) intensity_mean, intensity_std = config.intensity_mean, config.intensity_std batch_size = config.batch_size ModelCheckpoint_file = config.ModelCheckpoint_file look_back = config.look_back img_rows, img_cols = config.img_rows, config.img_cols subdir_list = [] hist_path = config.hist_path model = pretrain_model(look_back, batch_size) if os.path.exists(ModelCheckpoint_file): print('load load_weights', ModelCheckpoint_file) model.load_weights(ModelCheckpoint_file) print(model.summary()) # train_x = np.random.uniform(0,1,(17, 3, 1, 512, 512)) # train_y = np.random.uniform(0,1,(17,1)) # print (train_x) # train_x = np.array(train_x,dtype = 'float32') # train_y = np.array(train_y,dtype= 'float32') # hist = model.fit(train_x, train_y, nb_epoch=1, batch_size=batch_size, verbose=2, validation_split=0.1,shuffle=False) """ count the number of image in each typhoon sequence """ image_number_dictionary = {} for subdirs, dirs, files in os.walk(image_path): # print (subdirs) subdir_list.append(subdirs) for subdir in subdir_list: count = 0 for subdirs, dirs, files in os.walk(subdir): for file in files: count += 1 key = subdir.split('/')[-1] image_number_dictionary[key] = count if count < 24: print(key, count) # print (image_number_dictionary) """ check the number of images equals the number of track data? """ # for subdir in subdir_list: # for subdirs, dirs, files in os.walk(subdir): # for file in files: # # print (file) # [k1, k2] = file.split("-")[:2] # key = "".join((k1,k2)) # try: # mark = track_dict[key] # except KeyError: # print (file +'do not have track value') # for k in track_dict.keys(): # k2 = k[-6:] # typhoon number # k1 = k[:-6] # file = k1 +'-' + k2 +'*' # file_path = image_path + k2 +'/' + file # if not os.path.isfile(file_path): # print (file_path not exists) track_dict_number = {} equal_track_image_list = [] not_equal_track_image_list = [] for subdir in subdir_list: key = subdir.split('/')[-1] if len(key) > 0 and key not in ['201620', '201621', '201622']: track_file_path = track_path + key + '.itk' with open(track_file_path, 'rb') as tsv_file: tsv_reader = csv.reader(tsv_file, delimiter='\t') count = 0 for row in tsv_reader: count += 1 track_dict_number[key] = count if count != image_number_dictionary[key]: not_equal_track_image_list.append(key) # print (key,count,image_number_dictionary[key],'not equal') if count == image_number_dictionary[key]: # print (key,count,image_number_dictionary[key],' equal') equal_track_image_list.append(key) # print (not_equal_track_image_list,'not_equal_track_image_list') # print (equal_track_image_list,'equal_track_image_list') print(len(equal_track_image_list), 'lenth of eqaual track image list') # "check if track file difference is one hour, result is yes for both equal and not_eqaul_image_list " for key in not_equal_track_image_list: ts = [] track_file_path = track_path + key + '.itk' with open(track_file_path, 'rb') as tsv_file: tsv_reader = csv.reader(tsv_file, delimiter='\t') for row in tsv_reader: yy = row[0] mm = row[1] dd = row[2] hh = row[3] t = datetime.datetime.strptime( yy + ":" + mm + ":" + dd + ':' + hh, '%Y:%m:%d:%H') ts.append(t) tmp = ts[0] for i in range(1, len(ts)): dif = (ts[i] - tmp).total_seconds() # print (dif,'dif') if dif != 3600: print(dif, i, key) tmp = ts[i] # break data_folder_path = config.data_folder_path if not os.path.exists(data_folder_path): equal_track_image_list = np.array(equal_track_image_list) np.random.shuffle(equal_track_image_list) equal_track_image_list = list(equal_track_image_list) # equal_track_image_list = equal_track_image_list[:2] train_folder = equal_track_image_list[:int(0.9 * len(equal_track_image_list) )] test_folder = equal_track_image_list[int(0.9 * len(equal_track_image_list)):] with open(data_folder_path, 'w') as f: json.dump( { 'train_folder': train_folder, 'test_folder': test_folder }, f) print('data_folder_path dumped to: ', data_folder_path) else: with open(data_folder_path, 'r') as f: data_folder = json.load(f) train_folder = data_folder['train_folder'] test_folder = data_folder['test_folder'] print('load data folder from: ', data_folder_path) dataset_image_dic = {} dataset_intensity_dic = {} dataset_image_path = 'test_file/dataset_imageset.hdf5' dataset_intensity_path = 'test_file/dataset_intensity.hdf5' # equal_track_image_list=equal_track_image_list[:2] # if not os.path.exists(dataset_image_path) : # vgg_model = VGG_16('vgg16_weights.h5') # sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) # vgg_model.compile(optimizer=sgd, loss='categorical_crossentropy') # hf_image = h5py.File(dataset_image_path) # hf_intensity = h5py.File(dataset_intensity_path) # for key in equal_track_image_list: # print(key) # image_folder = image_path + key +'/' # track_file_path = track_path + key + '.itk' # dataset_image = prepare_dataset.dataset_2(image_folder) # dataset_input = get_fc2(vgg_model,dataset_image) # dataset_input = np.array(dataset_input) # dataset_intensity = prepare_dataset.dataset_1(track_file_path) # dataset_intensity = prepare_dataset.normalize_intensity(dataset_intensity,intensity_mean,intensity_std) # print (dataset_input.shape,'dataset_image.shape') # print (dataset_intensity.shape,'dataset_intensity') # dataset_image_dic[key] = dataset_input # dataset_intensity_dic[key] = dataset_intensity # hf_image.create_dataset(key, data = dataset_input) # hf_intensity.create_dataset(key, data = dataset_intensity) # hf_image.close() # hf_intensity.close() # print ('dumped data into hf_image,intensity') # else: # print ('hf_image intensity exists') # for key in equal_track_image_list: # with h5py.File(dataset_image_path,'r') as hf_image: # dataset_image = np.array(hf_image.get(key)) # with h5py.File(dataset_intensity_path,'r') as hf_intensity: # dataset_intensity = np.array(hf_intensity.get(key)) # print (key, dataset_image.shape,dataset_intensity.shape) # train_selected_folder_index = random.sample(range(0,len(train_folder)),10) # test_selected_folder_index = random.sample(range(0,len(test_folder)),10) hf_image = h5py.File(dataset_image_path) hf_intensity = h5py.File(dataset_intensity_path) # for i in train_selected_folder_index: # key = train_folder[i] for key in train_folder: print(key) if os.path.exists(ModelCheckpoint_file): print('load load_weights', ModelCheckpoint_file) model.load_weights(ModelCheckpoint_file) dataset_image = np.array(hf_image.get(key)) dataset_intensity = np.array(hf_intensity.get(key)) train_x, train_y = prepare_dataset.create_dataset_2( dataset_image, dataset_intensity, look_back=look_back) train_x = np.array(train_x, dtype='float32') train_y = np.array(train_y, dtype='float32') train_predict_image = 'test_file/prediction_output_6_zero_r2/' + str( key) + '_' + str(look_back) + '_train.png' trainPredict = model.predict(train_x, batch_size=batch_size) model.reset_states() trainPredict = prepare_dataset.reverse_normalize_intensity( trainPredict, intensity_mean, intensity_std) trainY = prepare_dataset.reverse_normalize_intensity( train_y, intensity_mean, intensity_std) fig = plt.figure() plt.title('train_predicts_look_back ' + str(look_back) + ', typhoon number ' + str(key)) z = np.polyfit(trainPredict, trainY, 1) p = np.poly1d(z) xp = np.linspace(np.min(trainPredict), 1, np.max(trainPredict)) plt.plot(trainPredict, trainY, '.', xp, p(xp), '-') plt.xlabel('prediction value') plt.ylabel('true value') # plt.legend(loc = 'upper left', shadow =True) plt.savefig(train_predict_image) plt.close(fig) break # for i in test_selected_folder_index: for key in test_folder: # key = test_folder[i] print(key) if os.path.exists(ModelCheckpoint_file): print('load load_weights', ModelCheckpoint_file) model.load_weights(ModelCheckpoint_file) dataset_image = np.array(hf_image.get(key)) dataset_intensity = np.array(hf_intensity.get(key)) test_x, test_y = prepare_dataset.create_dataset_2(dataset_image, dataset_intensity, look_back=look_back) test_x = np.array(test_x, dtype='float32') test_y = np.array(test_y, dtype='float32') testPredict = model.predict(test_x, batch_size=batch_size) model.reset_states() # # # invert predictions testPredict = prepare_dataset.reverse_normalize_intensity( testPredict, intensity_mean, intensity_std) testY = prepare_dataset.reverse_normalize_intensity( test_y, intensity_mean, intensity_std) test_predict_image = 'test_file/prediction_output_6_zero_r2/' + str( key) + '_' + str(look_back) + '_test.png' fig = plt.figure() z = np.polyfit(testPredict, testY, 1) p = np.poly1d(z) xp = np.linspace(np.min(testPredict), 1, np.max(testPredict)) plt.plot(testPredict, testY, '.', xp, p(xp), '-') plt.title('test_predicts_look_back ' + str(look_back) + ', typhoon number ' + str(key)) plt.xlabel('prediction value') plt.ylabel('true value') # plt.legend(loc = 'upper left', shadow =True) plt.savefig(test_predict_image) plt.close(fig) break hf_image.close() hf_intensity.close() # trainY = prepare_dataset.reverse_normalize_intensity(train_y,intensity_mean,intensity_std) # print('load_weights',ModelCheckpoint_file) # model.load_weights(ModelCheckpoint_file) # trainPredict = model.predict(train_x, batch_size=batch_size) # trainPredict = prepare_dataset.reverse_normalize_intensity(trainPredict,intensity_mean,intensity_std) # trainY = prepare_dataset.reverse_normalize_intensity(train_y,intensity_mean,intensity_std) # trainScore = math.sqrt(mean_squared_error(trainY, trainPredict[:,0])) # model.reset_states() # print('Train Score: %.2f RMSE' % (trainScore)) # testPredict = model.predict(test_x, batch_size=batch_size) # # # invert predictions # testPredict = prepare_dataset.reverse_normalize_intensity(testPredict,intensity_mean,intensity_std) # testY = prepare_dataset.reverse_normalize_intensity(test_y,intensity_mean,intensity_std) # # # calculate root mean squared error # testScore = math.sqrt(mean_squared_error(testY, testPredict[:,0])) # print('Test Score: %.2f RMSE' % (testScore)) # print(look_back,'look_back') """ train_predict_image = config.train_predict_image test_predict_image = config.test_predict_image fig = plt.figure() plt.title('train_predicts_look_back') plt.plot(list(trainPredict[:20000,0]),'r--',label= 'train_predict') plt.plot(list(trainY[:20000]), 'g--',label = 'train') plt.xlabel('typhoon_image') plt.ylabel('typhoon intensity') plt.legend(loc = 'upper left', shadow =True) plt.savefig(train_predict_image) plt.close(fig) fig = plt.figure() plt.title('test_predicts_look_back') plt.plot(list(testPredict[:10000,0]),'r--',label= 'test_predict') plt.plot(list(testY[:10000]), 'g--',label = 'test') plt.xlabel('typhoon_image') plt.ylabel('typhoon intensity') plt.legend(loc = 'upper left', shadow =True) plt.savefig(test_predict_image) plt.close(fig) """ t2 = time.time() print("using %s seconds" % (t2 - t1))
# 23.セクション構造 # 記事中に含まれるセクション名とそのレベル(例えば"== セクション名 =="なら1)を表示せよ # coding: utf-8 import re import load pattern = re.compile(r"""^(={2,})\s*(.+?)\s*\1.*$""", re.MULTILINE) # 抽出 result = pattern.findall(load.load_json("イギリス")) # 表示 # '==' => 1 for category in result: level = len(category[0]) - 1 indent = "\t" * (level - 1) print(f"{indent}{category[1]}({level})")
def main(): np.random.seed(7) t1 = time.time() image_path = config.image_path track_path = config.track_path track_dic_path = config.track_dic_path track_dict = load.load_json(track_dic_path) intensity_mean, intensity_std = config.intensity_mean, config.intensity_std batch_size = config.batch_size ModelCheckpoint_file = config.ModelCheckpoint_file look_back = config.look_back img_rows, img_cols = config.img_rows, config.img_cols subdir_list = [] hist_path = config.hist_path model = pretrain_model(look_back, batch_size) if os.path.exists(ModelCheckpoint_file): print('load load_weights', ModelCheckpoint_file) model.load_weights(ModelCheckpoint_file) print(model.summary()) # train_x = np.random.uniform(0,1,(17, 3, 1, 512, 512)) # train_y = np.random.uniform(0,1,(17,1)) # print (train_x) # train_x = np.array(train_x,dtype = 'float32') # train_y = np.array(train_y,dtype= 'float32') # hist = model.fit(train_x, train_y, nb_epoch=1, batch_size=batch_size, verbose=2, validation_split=0.1,shuffle=False) """ count the number of image in each typhoon sequence """ image_number_dictionary = {} for subdirs, dirs, files in os.walk(image_path): # print (subdirs) subdir_list.append(subdirs) for subdir in subdir_list: count = 0 for subdirs, dirs, files in os.walk(subdir): for file in files: count += 1 key = subdir.split('/')[-1] image_number_dictionary[key] = count if count < 24: print(key, count) # print (image_number_dictionary) """ check the number of images equals the number of track data? """ # for subdir in subdir_list: # for subdirs, dirs, files in os.walk(subdir): # for file in files: # # print (file) # [k1, k2] = file.split("-")[:2] # key = "".join((k1,k2)) # try: # mark = track_dict[key] # except KeyError: # print (file +'do not have track value') # for k in track_dict.keys(): # k2 = k[-6:] # typhoon number # k1 = k[:-6] # file = k1 +'-' + k2 +'*' # file_path = image_path + k2 +'/' + file # if not os.path.isfile(file_path): # print (file_path not exists) track_dict_number = {} equal_track_image_list = [] not_equal_track_image_list = [] for subdir in subdir_list: key = subdir.split('/')[-1] if len(key) > 0 and key not in ['201620', '201621', '201622']: track_file_path = track_path + key + '.itk' with open(track_file_path, 'rb') as tsv_file: tsv_reader = csv.reader(tsv_file, delimiter='\t') count = 0 for row in tsv_reader: count += 1 track_dict_number[key] = count if count != image_number_dictionary[key]: not_equal_track_image_list.append(key) # print (key,count,image_number_dictionary[key],'not equal') if count == image_number_dictionary[key]: # print (key,count,image_number_dictionary[key],' equal') equal_track_image_list.append(key) # print (not_equal_track_image_list,'not_equal_track_image_list') # print (equal_track_image_list,'equal_track_image_list') print(len(equal_track_image_list), 'lenth of eqaual track image list') # "check if track file difference is one hour, result is yes for both equal and not_eqaul_image_list " for key in not_equal_track_image_list: ts = [] track_file_path = track_path + key + '.itk' with open(track_file_path, 'rb') as tsv_file: tsv_reader = csv.reader(tsv_file, delimiter='\t') for row in tsv_reader: yy = row[0] mm = row[1] dd = row[2] hh = row[3] t = datetime.datetime.strptime( yy + ":" + mm + ":" + dd + ':' + hh, '%Y:%m:%d:%H') ts.append(t) tmp = ts[0] for i in range(1, len(ts)): dif = (ts[i] - tmp).total_seconds() # print (dif,'dif') if dif != 3600: print(dif, i, key) tmp = ts[i] # break data_folder = 'test_file/sorted_intensity_data_folder.json' # intensity_min = {} # for key in equal_track_image_list: # track_file_path = track_path + key + '.itk' # dataset_intensity = prepare_dataset.dataset_1(track_file_path) # intensity_min[key] = min(dataset_intensity) # sorted_intensity_min = sorted(intensity_min.iteritems(), key=lambda (k,v): (v,k)) # sorted_intensity_min =list(sorted_intensity_min) # sorted_intensity_min = np.array(sorted_intensity_min) # sorted_data_folder = sorted_intensity_min[:,0] # train_folder=[] # test_folder=[] # for i in range(0,len(sorted_data_folder),10): # if (i+10) <= len(sorted_data_folder): # j = i+10 # else: # j = len(sorted_data_folder) +1 # small_list = sorted_data_folder[i:j] # small_list = np.array(small_list) # np.random.shuffle(small_list) # small_list = list(small_list) # train_folder += small_list[:int(0.9*len(small_list))] # test_folder += small_list[int(0.9*len(small_list)):] # print train_folder # print test_folder # print len(train_folder) # print len(test_folder) # with open(data_folder,'w') as f: # json.dump({'train_folder':train_folder,'test_folder':test_folder},f) """ # get train test data from pre_built dataset """ # dataset_imageset # 0.423964 mean data # 0.569374 std data # 0.0 min # 4.71836 max dataset_image_path = 'test_file/dataset_imageset.hdf5' dataset_intensity_path = 'test_file/dataset_intensity.hdf5' hf_image = h5py.File(dataset_image_path) hf_intensity = h5py.File(dataset_intensity_path) train_x = [] train_y = [] test_x = [] test_y = [] vgg_fc2_mean = config.vgg_fc2_mean vgg_fc2_std = config.vgg_fc2_std with open(data_folder, 'r') as f: data_folder = json.load(f) train_folder = data_folder['train_folder'] test_folder = data_folder['test_folder'] train_folder = np.array(train_folder) test_folder = np.array(test_folder) np.random.shuffle(train_folder) np.random.shuffle(test_folder) train_folder = list(train_folder) test_folder = list(test_folder) for key in train_folder: print(key) dataset_image = np.array(hf_image.get(key)) # dataset_image = prepare_dataset.normalize_intensity(dataset_image,vgg_fc2_mean,vgg_fc2_std) #normalize image (the same function of normalize intensity) dataset_intensity = np.array(hf_intensity.get(key)) if len(dataset_intensity) > look_back: data_x, data_y = prepare_dataset.extend_dataset_2_zero( dataset_image, dataset_intensity, look_back=look_back) train_x += data_x train_y += data_y for key in test_folder: print(key) dataset_image = np.array(hf_image.get(key)) # dataset_image = prepare_dataset.normalize_intensity(dataset_image,vgg_fc2_mean,vgg_fc2_std) dataset_intensity = np.array(hf_intensity.get(key)) if len(dataset_intensity) > look_back: data_x, data_y = prepare_dataset.extend_dataset_2_zero( dataset_image, dataset_intensity, look_back=look_back) test_x += data_x test_y += data_y # train = train_x + test_x train_x = np.array(train_x, dtype='float32') train_y = np.array(train_y, dtype='float32') test_x = np.array(test_x, dtype='float32') test_y = np.array(test_y, dtype='float32') print(train_x.shape, train_y.shape) print(test_x.shape, test_y.shape) train_hists = [] validation_hists = [] val_loss = sys.float_info.max for i in range(1000): print(i, 'epoch') # ModelCheckpoint_file = 'test_file/orig_weights_lstm_1.0_image_lookback_'+str(look_back)+str(i)+'_whole_equal.hdf5' # print('start train') hist = model.fit(train_x, train_y, nb_epoch=1, batch_size=batch_size, verbose=2, validation_split=0.1, shuffle=False) model.reset_states() train_hists.append(hist.history['loss'][0]) validation_hists.append(hist.history['val_loss'][0]) if val_loss > hist.history['val_loss'][0]: model.save_weights(ModelCheckpoint_file) print(i, val_loss, '->', hist.history['val_loss'][0], 'save_weights', ModelCheckpoint_file) val_loss = hist.history['val_loss'][0] print(train_hists, 'train_hists') print(validation_hists, 'validation_hists') with open(hist_path, 'w') as f: json.dump({'train_loss': train_hists, 'val_loss': validation_hists}, f) # hist = model.fit(train_x, train_y, nb_epoch=2, batch_size=batch_size, verbose=2, validation_split = 0.1,shuffle=False) # break # with open(hist_path,'w') as j: # json.dump(hist.history,j) # validation_hists_least_index = validation_hists.index(min(validation_hists)) # print ('ModelCheckpoint_file','test_file/orig_weights_lstm_1.0_image_lookback_'+str(look_back)+str(validation_hists_least_index)+'_whole_equal.hdf5') # model.load_weights('test_file/orig_weights_lstm_1.0_image_lookback_'+str(look_back)+str(validation_hists_least_index)+'_whole_equal.hdf5') print('load_weights', ModelCheckpoint_file) model.load_weights(ModelCheckpoint_file) trainPredict = model.predict(train_x, batch_size=batch_size) trainPredict = prepare_dataset.reverse_normalize_intensity( trainPredict, intensity_mean, intensity_std) trainY = prepare_dataset.reverse_normalize_intensity( train_y, intensity_mean, intensity_std) trainScore = math.sqrt(mean_squared_error(trainY, trainPredict[:, 0])) model.reset_states() print('Train Score: %.2f RMSE' % (trainScore)) testPredict = model.predict(test_x, batch_size=batch_size) # # invert predictions testPredict = prepare_dataset.reverse_normalize_intensity( testPredict, intensity_mean, intensity_std) testY = prepare_dataset.reverse_normalize_intensity( test_y, intensity_mean, intensity_std) # # calculate root mean squared error testScore = math.sqrt(mean_squared_error(testY, testPredict[:, 0])) print('Test Score: %.2f RMSE' % (testScore)) print(look_back, 'look_back') t2 = time.time() print("using %s seconds" % (t2 - t1))
def main(): np.random.seed(7) t1 = time.time() image_path = config.image_path track_path = config.track_path track_dic_path = config.track_dic_path track_dict = load.load_json(track_dic_path) intensity_mean,intensity_std = config.intensity_mean, config.intensity_std batch_size = config.batch_size # ModelCheckpoint_file = config.ModelCheckpoint_file # ModelCheckpoint_file = 'test_file/orig_weights_lstm_1.0_image_lookback_24_whole_equal_pretrain_epoch_1000_adadelta_0.0001_prediction_zero.hdf5' # ModelCheckpoint_file='test_file/orig_weights_lstm_1.0_image_lookback_6_whole_equal_pretrain_epoch_1000_adadelta_0.0001_server_2.hdf5' # look_back = config.look_back look_back = 24 img_rows,img_cols = config.img_rows,config.img_cols subdir_list = [] hist_path = config.hist_path model = pretrain_model(look_back,batch_size) # ModelCheckpoint_file = 'test_file/orig_weights_lstm_1.0_image_lookback_6_whole_equal_pretrain_epoch_1000_adadelta_0.0001_04_13.hdf5' # ModelCheckpoint_file ='test_file/orig_weights_lstm_1.0_image_lookback_12_whole_equal_pretrain_epoch_1000_adadelta_0.0001_prediction_zero.hdf5' # ModelCheckpoint_file = 'test_file/orig_weights_lstm_1.0_image_lookback_24_whole_equal_pretrain_epoch_1000_adadelta_0.0001_prediction_zero.hdf5' # ModelCheckpoint_file = 'test_file/orig_weights_lstm_1.0_image_lookback_6_whole_equal_pretrain_epoch_1000_adadelta_0.0001_04_13.hdf5' # ModelCheckpoint_file = 'test_file/orig_hist_lstm_1.0_image_lookback_12_whole_equal_pretrain__epoch_1000_adadelta_0.0001_prediction.hdf5' ModelCheckpoint_file = 'test_file/orig_weights_lstm_1.0_image_lookback_24_whole_equal_pretrain_epoch_1000_adadelta_0.0001_04_13.hdf5' if os.path.exists(ModelCheckpoint_file): print ('load load_weights',ModelCheckpoint_file) model.load_weights(ModelCheckpoint_file) print(model.summary()) # train_x = np.random.uniform(0,1,(17, 3, 1, 512, 512)) # train_y = np.random.uniform(0,1,(17,1)) # print (train_x) # train_x = np.array(train_x,dtype = 'float32') # train_y = np.array(train_y,dtype= 'float32') # hist = model.fit(train_x, train_y, nb_epoch=1, batch_size=batch_size, verbose=2, validation_split=0.1,shuffle=False) """ count the number of image in each typhoon sequence """ image_number_dictionary={} for subdirs, dirs, files in os.walk(image_path): # print (subdirs) subdir_list.append(subdirs) for subdir in subdir_list: count = 0 for subdirs, dirs, files in os.walk(subdir): for file in files: count += 1 key = subdir.split('/')[-1] image_number_dictionary[key] = count if count < 24: print (key,count) # print (image_number_dictionary) """ check the number of images equals the number of track data? """ # for subdir in subdir_list: # for subdirs, dirs, files in os.walk(subdir): # for file in files: # # print (file) # [k1, k2] = file.split("-")[:2] # key = "".join((k1,k2)) # try: # mark = track_dict[key] # except KeyError: # print (file +'do not have track value') # for k in track_dict.keys(): # k2 = k[-6:] # typhoon number # k1 = k[:-6] # file = k1 +'-' + k2 +'*' # file_path = image_path + k2 +'/' + file # if not os.path.isfile(file_path): # print (file_path not exists) track_dict_number ={} equal_track_image_list = [] not_equal_track_image_list = [] for subdir in subdir_list: key =subdir.split('/')[-1] if len(key) > 0 and key not in ['201620','201621','201622']: track_file_path = track_path + key+'.itk' with open(track_file_path,'rb') as tsv_file: tsv_reader = csv.reader(tsv_file, delimiter='\t') count = 0 for row in tsv_reader: count += 1 track_dict_number[key] = count if count != image_number_dictionary[key]: not_equal_track_image_list.append(key) # print (key,count,image_number_dictionary[key],'not equal') if count == image_number_dictionary[key]: # print (key,count,image_number_dictionary[key],' equal') equal_track_image_list.append(key) # print (not_equal_track_image_list,'not_equal_track_image_list') # print (equal_track_image_list,'equal_track_image_list') """ # check_intensities statistics data_folder = not_equal_track_image_list + equal_track_image_list intensities=[] for folder in data_folder: file_name = track_path + folder+'.itk' with open(file_name,'rb') as tsv_file: tsv_reader = csv.reader(tsv_file, delimiter='\t') for row in tsv_reader: #print row.type intensity = float(row[-2]) intensities.append(intensity) intensities = np.array(intensities) print intensities print intensities.shape print np.mean(intensities,axis=0),'mean' print np.std(intensities,axis=0),'std' print np.min(intensities,axis=0),'min' print np.max(intensities,axis =0),'max' """ print (len(equal_track_image_list),'lenth of eqaual track image list') # "check if track file difference is one hour, result is yes for both equal and not_eqaul_image_list " for key in not_equal_track_image_list: ts =[] track_file_path = track_path + key+'.itk' with open(track_file_path,'rb') as tsv_file: tsv_reader = csv.reader(tsv_file, delimiter='\t') for row in tsv_reader: yy = row[0] mm = row[1] dd = row[2] hh = row[3] t = datetime.datetime.strptime(yy +":" + mm +":" + dd +':' +hh, '%Y:%m:%d:%H') ts.append(t) tmp = ts[0] for i in range(1,len(ts)): dif = (ts[i] - tmp).total_seconds() # print (dif,'dif') if dif != 3600: print (dif,i,key) tmp = ts[i] # break data_folder_path = config.data_folder_path # data_folder_path ='test_file/sorted_intensity_data_folder.json' if not os.path.exists(data_folder_path): equal_track_image_list = np.array(equal_track_image_list) np.random.shuffle(equal_track_image_list) equal_track_image_list = list(equal_track_image_list) # equal_track_image_list = equal_track_image_list[:2] train_folder = equal_track_image_list[:int(0.9 * len(equal_track_image_list))] test_folder = equal_track_image_list[int(0.9* len(equal_track_image_list)):] with open(data_folder_path,'w') as f: json.dump({'train_folder':train_folder,'test_folder': test_folder},f) print ('data_folder_path dumped to: ',data_folder_path) else: with open(data_folder_path,'r') as f: data_folder = json.load(f) train_folder = data_folder['train_folder'] test_folder = data_folder['test_folder'] print ('load data folder from: ' , data_folder_path) dataset_image_dic = {} dataset_intensity_dic ={} dataset_image_path = 'test_file/dataset_imageset.hdf5' dataset_intensity_path = 'test_file/dataset_intensity.hdf5' # equal_track_image_list=equal_track_image_list[:2] # if not os.path.exists(dataset_image_path) : # vgg_model = VGG_16('vgg16_weights.h5') # sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) # vgg_model.compile(optimizer=sgd, loss='categorical_crossentropy') # hf_image = h5py.File(dataset_image_path) # hf_intensity = h5py.File(dataset_intensity_path) # for key in equal_track_image_list: # print(key) # image_folder = image_path + key +'/' # track_file_path = track_path + key + '.itk' # dataset_image = prepare_dataset.dataset_2(image_folder) # dataset_input = get_fc2(vgg_model,dataset_image) # dataset_input = np.array(dataset_input) # dataset_intensity = prepare_dataset.dataset_1(track_file_path) # dataset_intensity = prepare_dataset.normalize_intensity(dataset_intensity,intensity_mean,intensity_std) # print (dataset_input.shape,'dataset_image.shape') # print (dataset_intensity.shape,'dataset_intensity') # dataset_image_dic[key] = dataset_input # dataset_intensity_dic[key] = dataset_intensity # hf_image.create_dataset(key, data = dataset_input) # hf_intensity.create_dataset(key, data = dataset_intensity) # hf_image.close() # hf_intensity.close() # print ('dumped data into hf_image,intensity') # else: # print ('hf_image intensity exists') # for key in equal_track_image_list: # with h5py.File(dataset_image_path,'r') as hf_image: # dataset_image = np.array(hf_image.get(key)) # with h5py.File(dataset_intensity_path,'r') as hf_intensity: # dataset_intensity = np.array(hf_intensity.get(key)) # print (key, dataset_image.shape,dataset_intensity.shape) # train_selected_folder_index = random.sample(range(0,len(train_folder)),10) # test_selected_folder_index = random.sample(range(0,len(test_folder)),10) hf_image = h5py.File(dataset_image_path) hf_intensity = h5py.File(dataset_intensity_path) # for i in train_selected_folder_index: # key = train_folder[i] # train_folder=['201314'] # train_folder=['199406'] # train_folder = train_folder[:2] # test_folder = test_folder[:2] csv_file = 'test_file/24_prediction_error_each_typhoon.csv' with open(csv_file, 'wb') as csvfile: writer = csv.writer(csvfile, delimiter=' ') for key in train_folder: print(key) if os.path.exists(ModelCheckpoint_file): print ('load load_weights',ModelCheckpoint_file) model.load_weights(ModelCheckpoint_file) dataset_image = np.array(hf_image.get(key)) dataset_intensity = np.array(hf_intensity.get(key)) # print (dataset_image.shape,'dataset_image') if len(dataset_intensity) > look_back: train_x,train_y = prepare_dataset.create_dataset_2(dataset_image, dataset_intensity,look_back = look_back) train_x = np.array(train_x,dtype = 'float32') train_y = np.array(train_y,dtype = 'float32') if train_x.shape[0] >0: # train_predict_image = 'test_file/prediction_output_6_04_13_old_version/' + str(key)+'_'+str(look_back)+'_train.png' trainPredict = model.predict(train_x, batch_size=batch_size) model.reset_states() trainPredict = prepare_dataset.reverse_normalize_intensity(trainPredict,intensity_mean,intensity_std) trainY = prepare_dataset.reverse_normalize_intensity(train_y,intensity_mean,intensity_std) # print (trainPredict,'train_predict') # print (trainY,'trainY') trainScore = math.sqrt(mean_squared_error(trainY, trainPredict[:,0])) print('Train Score: %.2f RMSE' % (trainScore)) writer.writerow([key,'train',trainScore]) # for i in test_selected_folder_index: for key in test_folder: # key = test_folder[i] print (key) if os.path.exists(ModelCheckpoint_file): print ('load load_weights',ModelCheckpoint_file) model.load_weights(ModelCheckpoint_file) dataset_image = np.array(hf_image.get(key)) dataset_intensity = np.array(hf_intensity.get(key)) if len(dataset_intensity) > look_back: test_x,test_y = prepare_dataset.create_dataset_2(dataset_image, dataset_intensity,look_back = look_back) test_x = np.array(test_x,dtype = 'float32') test_y = np.array(test_y,dtype = 'float32') if test_x.shape[0] > 0: testPredict = model.predict(test_x, batch_size=batch_size) model.reset_states() # # # invert predictions testPredict = prepare_dataset.reverse_normalize_intensity(testPredict,intensity_mean,intensity_std) testY = prepare_dataset.reverse_normalize_intensity(test_y,intensity_mean,intensity_std) testScore = math.sqrt(mean_squared_error(testY, testPredict[:,0])) writer.writerow([key,'test',testScore]) hf_image.close() hf_intensity.close() t2 = time.time() print ("using %s seconds" % (t2-t1))
def main(): np.random.seed(7) t1 = time.time() image_path = config.image_path track_path = config.track_path track_dic_path = config.track_dic_path track_dict = load.load_json(track_dic_path) intensity_mean, intensity_std = config.intensity_mean, config.intensity_std batch_size = config.batch_size ModelCheckpoint_file = config.ModelCheckpoint_file look_back = config.look_back img_rows, img_cols = config.img_rows, config.img_cols subdir_list = [] hist_path = config.hist_path mean_v, std_v = config.mean_v, config.std_v intensity_mean, intensity_std = config.intensity_mean, config.intensity_std model = pretrain_model(look_back, batch_size) if os.path.exists(ModelCheckpoint_file): print('load load_weights', ModelCheckpoint_file) model.load_weights(ModelCheckpoint_file) print(model.summary()) # train_x = np.random.uniform(0,1,(17, 3, 1, 512, 512)) # train_y = np.random.uniform(0,1,(17,1)) # print (train_x) # train_x = np.array(train_x,dtype = 'float32') # train_y = np.array(train_y,dtype= 'float32') # hist = model.fit(train_x, train_y, nb_epoch=1, batch_size=batch_size, verbose=2, validation_split=0.1,shuffle=False) """ count the number of image in each typhoon sequence """ image_number_dictionary = {} for subdirs, dirs, files in os.walk(image_path): # print (subdirs) subdir_list.append(subdirs) for subdir in subdir_list: count = 0 for subdirs, dirs, files in os.walk(subdir): for file in files: count += 1 key = subdir.split('/')[-1] image_number_dictionary[key] = count if count < 24: print(key, count) # print (image_number_dictionary) """ check the number of images equals the number of track data? """ # for subdir in subdir_list: # for subdirs, dirs, files in os.walk(subdir): # for file in files: # # print (file) # [k1, k2] = file.split("-")[:2] # key = "".join((k1,k2)) # try: # mark = track_dict[key] # except KeyError: # print (file +'do not have track value') # for k in track_dict.keys(): # k2 = k[-6:] # typhoon number # k1 = k[:-6] # file = k1 +'-' + k2 +'*' # file_path = image_path + k2 +'/' + file # if not os.path.isfile(file_path): # print (file_path not exists) track_dict_number = {} equal_track_image_list = [] not_equal_track_image_list = [] for subdir in subdir_list: key = subdir.split('/')[-1] if len(key) > 0 and key not in ['201620', '201621', '201622']: track_file_path = track_path + key + '.itk' with open(track_file_path, 'rb') as tsv_file: tsv_reader = csv.reader(tsv_file, delimiter='\t') count = 0 for row in tsv_reader: count += 1 track_dict_number[key] = count if count != image_number_dictionary[key]: not_equal_track_image_list.append(key) # print (key,count,image_number_dictionary[key],'not equal') if count == image_number_dictionary[key]: # print (key,count,image_number_dictionary[key],' equal') equal_track_image_list.append(key) # print (not_equal_track_image_list,'not_equal_track_image_list') # print (equal_track_image_list,'equal_track_image_list') print(len(equal_track_image_list), 'lenth of eqaual track image list') # "check if track file difference is one hour, result is yes for both equal and not_eqaul_image_list " for key in not_equal_track_image_list: ts = [] track_file_path = track_path + key + '.itk' with open(track_file_path, 'rb') as tsv_file: tsv_reader = csv.reader(tsv_file, delimiter='\t') for row in tsv_reader: yy = row[0] mm = row[1] dd = row[2] hh = row[3] t = datetime.datetime.strptime( yy + ":" + mm + ":" + dd + ':' + hh, '%Y:%m:%d:%H') ts.append(t) tmp = ts[0] for i in range(1, len(ts)): dif = (ts[i] - tmp).total_seconds() # print (dif,'dif') if dif != 3600: print(dif, i, key) tmp = ts[i] # break dataset_imageset_path = 'test_file/dataset_image_unequal.hdf5' dataset_intensity_path = 'test_file/dataset_intensity_unequal.hdf5' # hf_image = h5py.File(dataset_imageset_path) # hf_intensity = h5py.File(dataset_intensity_path) vgg_model = VGG_16('vgg16_weights.h5') sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) vgg_model.compile(optimizer=sgd, loss='categorical_crossentropy') for key in not_equal_track_image_list: # # for key in equal_track_image_list: image_folder = image_path + key + '/' # dataset_x,dataset_y = prepare_dataset.dataset_1_2(image_folder,track_dict) # print dataset_x.shape # print dataset_y.shape # break file_path_list = [] # print key dataset_image = [] dataset_intensity = [] for subdirs, dirs, files in os.walk(image_folder): for file in files: file_path = os.path.join(subdirs, file) file_path_list.append(file_path) sorted_file_list = sorted( file_path_list, key=lambda x: int(x.split('/')[-1].split('-')[-4])) # print (len(sorted_file_list),'len of sorted_file_list') ts = [] intensities = [] for file_path in sorted_file_list: yymmddhh = file_path.split('/')[-1].split('-')[-4] track_key = yymmddhh + key intensities.append(float(track_dict[track_key][-2])) t = datetime.datetime.strptime(yymmddhh, '%Y%m%d%H') ts.append(t) # print len(ts),'len ts' tmp = ts[0] orig_image = load.get_x(sorted_file_list, img_rows, img_cols, mean_v, std_v) tmp_image = orig_image[0] # dataset_input = get_fc2(vgg_model,dataset_image) # dataset_input = np.array(dataset_input) dataset_image.append(orig_image[0]) dataset_intensity.append(intensities[0]) for i in range(1, len(ts)): dif = (ts[i] - tmp).total_seconds() # print (dif,'dif') if dif != 3600: print(dif / 3600.0, i, key, ts[i]) for j in range(1, int(dif / 3600.0)): t2 = tmp + datetime.timedelta(seconds=3600) yy = t2.year mm = str(t2.month).zfill(2) dd = str(t2.day).zfill(2) hh = str(t2.hour).zfill(2) yymmddhh = str(yy) + mm + dd + hh track_key = yymmddhh + key intensity = float(track_dict[track_key][-2]) image = (1 - (float(j) / (dif / 3600.0))) * tmp_image + ( float(j) / (dif / 3600.0)) * orig_image[i] dataset_image.append(image) dataset_intensity.append(intensity) dataset_image.append(orig_image[i]) dataset_intensity.append(intensities[i]) tmp = ts[i] tmp_image = orig_image[i] # dataset_image = np.array(dataset_image) for i in range(len(dataset_image)): show_image( dataset_image[i][0], 'test_file/unequal_image_generate_test/' + str(key) + '_' + str(i) + '.jpg') # dataset_input = get_fc2(vgg_model,dataset_image) # dataset_intensity = np.array(dataset_intensity) # dataset_intensity = prepare_dataset.normalize_intensity(dataset_intensity, intensity_mean,intensity_std) # hf_image.create_dataset(key, data = dataset_input) # hf_intensity.create_dataset(key, data = dataset_intensity) # break # hf_image.close() # hf_intensity.close() t2 = time.time() print("using %s seconds" % (t2 - t1))
def main(): np.random.seed(7) t1 = time.time() image_path = config.image_path track_path = config.track_path track_dic_path = config.track_dic_path track_dict = load.load_json(track_dic_path) intensity_mean, intensity_std = config.intensity_mean, config.intensity_std batch_size = config.batch_size ModelCheckpoint_file = config.ModelCheckpoint_file look_back = config.look_back img_rows, img_cols = config.img_rows, config.img_cols subdir_list = [] hist_path = config.hist_path # train_x = np.random.uniform(0,1,(17, 3, 1, 512, 512)) # train_y = np.random.uniform(0,1,(17,1)) # print (train_x) # train_x = np.array(train_x,dtype = 'float32') # train_y = np.array(train_y,dtype= 'float32') # hist = model.fit(train_x, train_y, nb_epoch=1, batch_size=batch_size, verbose=2, validation_split=0.1,shuffle=False) """ count the number of image in each typhoon sequence """ image_number_dictionary = {} for subdirs, dirs, files in os.walk(image_path): # print (subdirs) subdir_list.append(subdirs) for subdir in subdir_list: count = 0 for subdirs, dirs, files in os.walk(subdir): for file in files: count += 1 key = subdir.split('/')[-1] image_number_dictionary[key] = count if count < 24: print(key, count) # print (image_number_dictionary) """ check the number of images equals the number of track data? """ # for subdir in subdir_list: # for subdirs, dirs, files in os.walk(subdir): # for file in files: # # print (file) # [k1, k2] = file.split("-")[:2] # key = "".join((k1,k2)) # try: # mark = track_dict[key] # except KeyError: # print (file +'do not have track value') # for k in track_dict.keys(): # k2 = k[-6:] # typhoon number # k1 = k[:-6] # file = k1 +'-' + k2 +'*' # file_path = image_path + k2 +'/' + file # if not os.path.isfile(file_path): # print (file_path not exists) track_dict_number = {} equal_track_image_list = [] not_equal_track_image_list = [] for subdir in subdir_list: key = subdir.split('/')[-1] if len(key) > 0 and key not in ['201620', '201621', '201622']: track_file_path = track_path + key + '.itk' with open(track_file_path, 'rb') as tsv_file: tsv_reader = csv.reader(tsv_file, delimiter='\t') count = 0 for row in tsv_reader: count += 1 track_dict_number[key] = count if count != image_number_dictionary[key]: not_equal_track_image_list.append(key) # print (key,count,image_number_dictionary[key],'not equal') if count == image_number_dictionary[key]: # print (key,count,image_number_dictionary[key],' equal') equal_track_image_list.append(key) # print (not_equal_track_image_list,'not_equal_track_image_list') # print (equal_track_image_list,'equal_track_image_list') """ # check_intensities statistics data_folder = not_equal_track_image_list + equal_track_image_list intensities=[] for folder in data_folder: file_name = track_path + folder+'.itk' with open(file_name,'rb') as tsv_file: tsv_reader = csv.reader(tsv_file, delimiter='\t') for row in tsv_reader: #print row.type intensity = float(row[-2]) intensities.append(intensity) intensities = np.array(intensities) print intensities print intensities.shape print np.mean(intensities,axis=0),'mean' print np.std(intensities,axis=0),'std' print np.min(intensities,axis=0),'min' print np.max(intensities,axis =0),'max' """ print(len(equal_track_image_list), 'lenth of eqaual track image list') # "check if track file difference is one hour, result is yes for both equal and not_eqaul_image_list " for key in not_equal_track_image_list: ts = [] track_file_path = track_path + key + '.itk' with open(track_file_path, 'rb') as tsv_file: tsv_reader = csv.reader(tsv_file, delimiter='\t') for row in tsv_reader: yy = row[0] mm = row[1] dd = row[2] hh = row[3] t = datetime.datetime.strptime( yy + ":" + mm + ":" + dd + ':' + hh, '%Y:%m:%d:%H') ts.append(t) tmp = ts[0] for i in range(1, len(ts)): dif = (ts[i] - tmp).total_seconds() # print (dif,'dif') if dif != 3600: print(dif, i, key) tmp = ts[i] # break data_folder_path = config.data_folder_path if not os.path.exists(data_folder_path): equal_track_image_list = np.array(equal_track_image_list) np.random.shuffle(equal_track_image_list) equal_track_image_list = list(equal_track_image_list) # equal_track_image_list = equal_track_image_list[:2] train_folder = equal_track_image_list[:int(0.9 * len(equal_track_image_list) )] test_folder = equal_track_image_list[int(0.9 * len(equal_track_image_list)):] with open(data_folder_path, 'w') as f: json.dump( { 'train_folder': train_folder, 'test_folder': test_folder }, f) print('data_folder_path dumped to: ', data_folder_path) else: with open(data_folder_path, 'r') as f: data_folder = json.load(f) train_folder = data_folder['train_folder'] test_folder = data_folder['test_folder'] print('load data folder from: ', data_folder_path) dataset_image_dic = {} dataset_intensity_dic = {} dataset_image_path = 'test_file/dataset_imageset.hdf5' dataset_intensity_path = 'test_file/dataset_intensity.hdf5' dataset_type_path = 'test_file/dataset_type.hdf5' # equal_track_image_list=equal_track_image_list[:2] # if not os.path.exists(dataset_image_path) : # vgg_model = VGG_16('vgg16_weights.h5') # sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) # vgg_model.compile(optimizer=sgd, loss='categorical_crossentropy') # hf_image = h5py.File(dataset_image_path) # hf_intensity = h5py.File(dataset_intensity_path) # for key in equal_track_image_list: # print(key) # image_folder = image_path + key +'/' # track_file_path = track_path + key + '.itk' # dataset_image = prepare_dataset.dataset_2(image_folder) # dataset_input = get_fc2(vgg_model,dataset_image) # dataset_input = np.array(dataset_input) # dataset_intensity = prepare_dataset.dataset_1(track_file_path) # dataset_intensity = prepare_dataset.normalize_intensity(dataset_intensity,intensity_mean,intensity_std) # print (dataset_input.shape,'dataset_image.shape') # print (dataset_intensity.shape,'dataset_intensity') # dataset_image_dic[key] = dataset_input # dataset_intensity_dic[key] = dataset_intensity # hf_image.create_dataset(key, data = dataset_input) # hf_intensity.create_dataset(key, data = dataset_intensity) # hf_image.close() # hf_intensity.close() # print ('dumped data into hf_image,intensity') # else: # print ('hf_image intensity exists') # for key in equal_track_image_list: # with h5py.File(dataset_image_path,'r') as hf_image: # dataset_image = np.array(hf_image.get(key)) # with h5py.File(dataset_intensity_path,'r') as hf_intensity: # dataset_intensity = np.array(hf_intensity.get(key)) # print (key, dataset_image.shape,dataset_intensity.shape) # train_selected_folder_index = random.sample(range(0,len(train_folder)),10) # test_selected_folder_index = random.sample(range(0,len(test_folder)),10) hf_image = h5py.File(dataset_image_path) hf_intensity = h5py.File(dataset_intensity_path) hf_type = h5py.File(dataset_type_path) # for i in train_selected_folder_index: # key = train_folder[i] # train_folder=['201314'] model = pretrain_model(look_back, batch_size) csv_path_train = 'test_file/train_prediction_compare_initilization_or_no_look_back_6/' csv_path_test = 'test_file/test_prediction_compare_initilization_or_no_look_back_6/' ModelCheckpoint_file_2 = config.ModelCheckpoint_file_2 ModelCheckpoint_file = config.ModelCheckpoint_file # train_folder=train_folder[:3] train_error_10 = 0.0 train_error_10_2 = 0.0 train_error_10_p = 0.0 train_error_10_2_p = 0.0 count = 0.0 train_error_extra = 0.0 train_error_tropical = 0.0 train_error_tropical_p = 0.0 train_error_extra_p = 0.0 count_extra = 0.0 count_trop = 0.0 for key in train_folder: print(key) dataset_image = np.array(hf_image.get(key)) dataset_intensity = np.array(hf_intensity.get(key)) dataset_type = np.array(hf_type.get((key))) # print (dataset_image.shape,'dataset_image') train_x, train_y = prepare_dataset.create_dataset_2_zero( dataset_image, dataset_intensity, look_back=look_back) train_x_2, train_y_2 = prepare_dataset.extend_dataset_2_zero( dataset_image, dataset_intensity, look_back=look_back) train_type = prepare_dataset.create_dataset_y_zero(dataset_type, look_back=look_back) train_x = np.array(train_x, dtype='float32') train_y = np.array(train_y, dtype='float32') train_x_2 = np.array(train_x_2, dtype='float32') train_type = np.array(train_type) if train_x.shape[0] > 0: if os.path.exists(ModelCheckpoint_file): print('load load_weights', ModelCheckpoint_file) model.load_weights(ModelCheckpoint_file) trainPredict = model.predict(train_x, batch_size=batch_size) model.reset_states() trainPredict = prepare_dataset.reverse_normalize_intensity( trainPredict, intensity_mean, intensity_std) trainY = prepare_dataset.reverse_normalize_intensity( train_y, intensity_mean, intensity_std) # print (trainPredict,'train_predict') # print (trainY,'trainY') if train_x_2.shape[0] > 0: if os.path.exists(ModelCheckpoint_file_2): print('load load_weights', ModelCheckpoint_file_2) model.load_weights(ModelCheckpoint_file_2) trainPredict_2 = model.predict(train_x_2, batch_size=batch_size) model.reset_states() trainPredict_2 = prepare_dataset.reverse_normalize_intensity( trainPredict_2, intensity_mean, intensity_std) if len(trainPredict) == len(trainPredict_2[20:]): csv_path_1 = csv_path_train + str(key) + '.csv' if not os.path.exists(csv_path_train): os.mkdir(csv_path_train) print 'writing to csv' + key if int(len(trainPredict)) >= 10: count += 1 train_error_10 += math.sqrt( mean_squared_error(trainPredict[:10], trainY[:10])) train_error_10_2 += math.sqrt( mean_squared_error(trainPredict_2[20:30], trainY[:10])) train_error_10_p += np.sum( np.power( (trainPredict[:10] - trainY[:10]), 2)) / np.sum( np.power((trainPredict - trainY), 2)) train_error_10_2_p += np.sum( np.power((trainPredict_2[20:30] - trainY[:10]), 2)) / np.sum( np.power( (trainPredict_2[20:] - trainY), 2)) if 6 in train_type: train_error_extra += math.sqrt( mean_squared_error(trainPredict_2[20:], trainY)) # train_error_extra_p += np.sum(np.power((trainPredict_2[20:0.1*len(trainPredict) +20]-trainY[:int(0.1*len(trainPredict))]),2))/np.sum(np.power((trainPredict_2[20:]-trainY),2)) count_extra += 1 if 6 not in train_type: train_error_tropical += math.sqrt( mean_squared_error(trainPredict_2[20:], trainY)) # train_error_tropical_p += np.sum(np.power((trainPredict_2[20:0.1*len(trainPredict) +20]-trainY[:int(0.1*len(trainPredict))]),2))/np.sum(np.power((trainPredict_2[20:]-trainY),2)) count_trop += 1 trainPredict = np.reshape(trainPredict, (len(trainPredict), 1)) trainPredict_2 = np.reshape(np.array(trainPredict_2[20:]), (len(trainY), 1)) trainY = np.reshape(np.array(trainY), (len(trainY), 1)) train_type = np.reshape(np.array(train_type), (len(trainY), 1)) zz = np.concatenate( (trainPredict, trainPredict_2, trainY, train_type), 1) with open(csv_path_1, 'wb') as f: writer = csv.writer(f, delimiter=',') writer.writerow([ 'predictions_no_initialization', 'predictions_with_initializations', 'intensity_true', 'type_true' ]) writer.writerows(zz) print train_error_10, 'train_error_10' print train_error_10_p, 'train_error_10_p' print train_error_10_2, 'train_error_10_2' print train_error_10_2_p, 'train_error_10_2_p' print 'divide by len(train_folder)', count print train_error_10 / count, 'train_error_10' print train_error_10_p / count, 'train_error_10_p' print train_error_10_2 / count, 'train_error_10_2' print train_error_10_2_p / count, 'train_error_10_2_p' print train_error_tropical, 'train_error_tropical' print train_error_extra, 'train_error_extra' print count_trop, 'count_trop' print count_extra, 'count_extra' print train_error_tropical / count_trop, 'trop div' print train_error_extra / count_extra, 'trop_extra' # for i in test_selected_folder_index: # test_folder = test_folder[:3] test_error_10 = 0.0 test_error_10_2 = 0.0 test_error_10_p = 0.0 test_error_10_2_p = 0.0 test_error_extra = 0.0 test_error_tropical = 0.0 test_error_extra_p = 0.0 test_error_tropical_p = 0.0 count = 0.0 count_extra = 0.0 count_trop = 0.0 for key in test_folder: # key = test_folder[i] print(key) model.load_weights(ModelCheckpoint_file) dataset_image = np.array(hf_image.get(key)) dataset_intensity = np.array(hf_intensity.get(key)) dataset_type = np.array(hf_type.get((key))) test_x, test_y = prepare_dataset.create_dataset_2_zero( dataset_image, dataset_intensity, look_back=look_back) test_x_2, test_y_2 = prepare_dataset.extend_dataset_2_zero( dataset_image, dataset_intensity, look_back=look_back) test_type = prepare_dataset.create_dataset_y_zero(dataset_type, look_back=look_back) test_x = np.array(test_x, dtype='float32') test_y = np.array(test_y, dtype='float32') test_x_2 = np.array(test_x_2, dtype='float32') test_type = np.array(test_type) if test_x.shape[0] > 0: if os.path.exists(ModelCheckpoint_file): print('load load_weights', ModelCheckpoint_file) model.load_weights(ModelCheckpoint_file) testPredict = model.predict(test_x, batch_size=batch_size) model.reset_states() # # # invert predictions testPredict = prepare_dataset.reverse_normalize_intensity( testPredict, intensity_mean, intensity_std) testY = prepare_dataset.reverse_normalize_intensity( test_y, intensity_mean, intensity_std) if test_x_2.shape[0] > 0: if os.path.exists(ModelCheckpoint_file_2): print('load load_weights', ModelCheckpoint_file_2) model.load_weights(ModelCheckpoint_file_2) testPredict_2 = model.predict(test_x_2, batch_size=batch_size) model.reset_states() testPredict_2 = prepare_dataset.reverse_normalize_intensity( testPredict_2, intensity_mean, intensity_std) if len(testPredict) == len(testPredict_2[20:]): csv_path_1 = csv_path_test + str(key) + '.csv' if not os.path.exists(csv_path_test): os.mkdir(csv_path_test) print 'writing to csv' + key if int(len(testPredict)) >= 10: count += 1 test_error_10 += math.sqrt( mean_squared_error(testPredict[:10], testY[:10])) test_error_10_2 += math.sqrt( mean_squared_error(testPredict_2[20:30], testY[:10])) test_error_10_p += np.sum( np.power((testPredict[:10] - testY[:10]), 2)) / np.sum( np.power((testPredict - testY), 2)) test_error_10_2_p += np.sum( np.power( (testPredict_2[20:30] - testY[:10]), 2)) / np.sum( np.power((testPredict_2[20:] - testY), 2)) if 6 in test_type: test_error_extra += math.sqrt( mean_squared_error(testPredict_2[20:], testY)) # test_error_extra_p += np.sum(np.power((testPredict_2[20:0.1*len(testPredict) +20]-testY[:int(0.1*len(testPredict))]),2))/np.sum(np.power((testPredict_2[20:]-testY),2)) count_extra += 1 if 6 not in test_type: test_error_tropical += math.sqrt( mean_squared_error(testPredict_2[20:], testY)) # test_error_tropical_p += np.sum(np.power((testPredict_2[20:0.1*len(testPredict) +20]-testY[:int(0.1*len(testPredict))]),2))/np.sum(np.power((testPredict_2[20:]-testY),2)) count_trop += 1 testPredict = np.reshape(testPredict, (len(testPredict), 1)) testPredict_2 = np.reshape(np.array(testPredict_2[20:]), (len(testY), 1)) testY = np.reshape(np.array(testY), (len(testY), 1)) test_type = np.reshape(np.array(test_type), (len(testY), 1)) zz = np.concatenate( (testPredict, testPredict_2, testY, test_type), 1) with open(csv_path_1, 'wb') as f: writer = csv.writer(f, delimiter=',') writer.writerow([ 'predictions_no_initialization', 'predictions_with_initializations', 'intensity_true', 'type_true' ]) writer.writerows(zz) print test_error_10, 'test_error_10' print test_error_10_2, 'test_error_10_2' print test_error_10_p, 'test_error_10_p' print test_error_10_2_p, 'test_error_10_2_p' print 'divide by len(test_folder)', count print test_error_10 / count, 'test_error_10' print test_error_10_2 / count, 'test_error_10_2' print test_error_10_p / count, 'test_error_10_p' print test_error_10_2_p / count, 'test_error_10_2_p' print test_error_extra, 'test_error_extra' print test_error_tropical, 'test_error_tropical' # print test_error_extra_p,'test_error_extra_p' # print test_error_tropical_p,'test_error_tropical_p' print 'number of extra tropical', count_extra print 'number of tropical ', count_trop print test_error_extra / count_extra, 'test_error_extra/count_extra' print test_error_tropical / count_trop, 'test_error_tropical/count_trop' # print test_error_extra_p/count_extra,'test_error_extra_p/count_extra' # print test_error_tropical_p/count_trop,'test_error_tropical_p/count_trop' hf_image.close() hf_intensity.close() hf_type.close() t2 = time.time() print("using %s seconds" % (t2 - t1))
def main(): np.random.seed(7) # trackDictPath = config.track_dic_path # track_dict = load.load_json(trackDictPath) track_path = config.track_path suspicious_file_list_path = config.suspicious_file_list_path suspicious_file_list = load.load_json(suspicious_file_list_path) train_validation_test_subdirs_split = config.train_validation_test_subdirs_split intensity_mean, intensity_std = config.intensity_mean, config.intensity_std batch_size = config.batch_size ModelCheckpoint_file = config.ModelCheckpoint_file train_predict_image = config.train_predict_image test_predict_image = config.test_predict_image look_back = 3 file_list = [] for subdir, dirs, files in os.walk(track_path): for file in files: file_path = os.path.join(subdir, file) file_list.append(file_path) file_list = np.array(file_list) np.random.shuffle(file_list) file_list = list(file_list) file_list = file_list[:10] # print (file_list) # for file in file_list: # if len(file) <=2: # print (file) # print (file_list.index(file)) # file_list = file_list[:10] train_file_list = file_list[:int(0.9 * len(file_list))] test_file_list = file_list[int(0.9 * len(file_list)):] # print(train_file_list) trainX = [] trainY = [] testX = [] testY = [] dataset_count = 0 for file in train_file_list: try: data = prepare_dataset.dataset_1(file) data = prepare_dataset.normalize_intensity(data, intensity_mean, intensity_std) # data = list(data) trainXx, trainYy = prepare_dataset.create_dataset(data, look_back) trainX += trainXx trainY += trainYy dataset_count += data.shape[0] except: print(file) for file in test_file_list: try: data = prepare_dataset.dataset_1(file) data = prepare_dataset.normalize_intensity(data, intensity_mean, intensity_std) # data = list(data) testXx, testYy = prepare_dataset.create_dataset(data, look_back) testX += testXx testY += testYy dataset_count += data.shape[0] except: print(file) trainX = np.array(trainX, dtype='float32') trainY = np.array(trainY, dtype='float32') testX = np.array(testX, dtype='float32') testY = np.array(testY, dtype='float32') print(trainX.shape) print(testX.shape) trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1)) testX = np.reshape(testX, (testX.shape[0], testX.shape[1], 1)) batch_size = 1 model = Sequential() model.add( LSTM(4, batch_input_shape=(batch_size, look_back, 1), stateful=True)) model.add(Dense(3)) model.compile(loss='mean_squared_error', optimizer='adam') # checkpointer = ModelCheckpoint(filepath=ModelCheckpoint_file, verbose=2, save_best_only=True) hists = [] for i in range(10): hist = model.fit(trainX, trainY, nb_epoch=1, batch_size=batch_size, verbose=2, shuffle=False) model.reset_states() hists.append(hist.history['loss'][0]) print(hists, 'hists') # model.save_weights(ModelCheckpoint_file) # make predictions trainPredict = model.predict(trainX, batch_size=batch_size) model.reset_states() testPredict = model.predict(testX, batch_size=batch_size) # invert predictions trainPredict = prepare_dataset.reverse_normalize_intensity( trainPredict, intensity_mean, intensity_std) trainY = prepare_dataset.reverse_normalize_intensity( trainY, intensity_mean, intensity_std) testPredict = prepare_dataset.reverse_normalize_intensity( testPredict, intensity_mean, intensity_std) testY = prepare_dataset.reverse_normalize_intensity( testY, intensity_mean, intensity_std) # calculate root mean squared error # print (trainPredict[:,0], 'trainPredict') # print (trainPredict.shape,'len_train_predict') # print(trainY[0],'trainY') trainScore = math.sqrt(mean_squared_error(trainY, trainPredict[:, 0])) print('Train Score: %.2f RMSE' % (trainScore)) testScore = math.sqrt(mean_squared_error(testY, testPredict[:, 0])) print('Test Score: %.2f RMSE' % (testScore)) dataset = np.zeros((dataset_count, 1), dtype='float32') # trainPredictPlot = np.empty_like(dataset) # trainPredictPlot[:, :] = np.nan # trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict # # shift test predictions for plotting # testPredictPlot = np.empty_like(dataset) # testPredictPlot[:, :] = np.nan # testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict # # plt.plot(dataset)) fig = plt.figure() plt.title('train_predicts_look_back') plt.plot(list(trainPredict[:, 0]), 'r--', label='train_predict') plt.plot(list(trainY), 'g--', label='train') plt.legend(loc='upper left', shadow=True) plt.xlabel('typhoon_image') plt.ylael('typhoon intensity') plt.savefig(train_predict_image) plt.close(fig) fig = plt.figure() plt.title('test_predicts_look_back') plt.plot(list(testPredict[:, 0]), 'r--', label='test_predict') plt.plot(list(testY), 'g--', label='test') plt.xlabel('typhoon_image') plt.ylael('typhoon intensity') plt.legend(loc='upper left', shadow=True) plt.savefig(test_predict_image) plt.close(fig)
def main(): np.random.seed(7) # trackDictPath = config.track_dic_path # track_dict = load.load_json(trackDictPath) track_path = config.track_path suspicious_file_list_path = config.suspicious_file_list_path suspicious_file_list = load.load_json(suspicious_file_list_path) train_validation_test_subdirs_split = config.train_validation_test_subdirs_split intensity_mean, intensity_std = config.intensity_mean, config.intensity_std batch_size = config.batch_size ModelCheckpoint_file = 'test_file/orig_weights_lstm_1.0_lookback_24.hdf5' print('ModelCheckpoint_file', ModelCheckpoint_file) #config.ModelCheckpoint_file look_back = 1 batch_size = 1 print(look_back, 'look_back') file_list = [] model = lstm_model_1(batch_size, look_back) # model.load_weights(ModelCheckpoint_file) for subdir, dirs, files in os.walk(track_path): for file in files: file_path = os.path.join(subdir, file) file_list.append(file_path) file_list = np.array(file_list) np.random.shuffle(file_list) file_list = list(file_list) # file_list = file_list[:10] # print (file_list) # for file in file_list: # if len(file) <=2: # print (file) # print (file_list.index(file)) file_list = file_list[:10] train_file_list = file_list[:int(0.9 * len(file_list))] # validation_file_list = file_list[int(0.85*len(file_list)):int(0.9*len(file_list))] test_file_list = file_list[int(0.9 * len(file_list)):] print(len(train_file_list)) # print (len(validation_file_list)) print(len(test_file_list)) testX = [] testY = [] # dataset_count = 0 train_histss = [] validation_histss = [] train_file_list_copy = train_file_list # trainXS=np.array([]).reshape(0,look_back) # print (trainXS.shape,'trainxs shape') # trainYS = np.array([]).reshape(0,1) trainXS = [] trainYS = [] for i in np.arange(0, len(train_file_list_copy), 12): #len(train_file_list_copy) trainX = [] trainY = [] train_hists = [] validation_hists = [] print(i, 'i') train_file_list = train_file_list_copy[i:i + 12] # print len(train_file_list) for file in train_file_list: # print file # try: data = prepare_dataset.dataset_1(file) data = prepare_dataset.normalize_intensity(data, intensity_mean, intensity_std) # data = list(data) trainXx, trainYy = prepare_dataset.create_dataset(data, look_back) trainX += trainXx trainY += trainYy # print (trainX,'trainX') # print (trainY,'trainY') # break # dataset_count += data.shape[0] # except: # print(file,'error') trainX = np.array(trainX, dtype='float32') trainY = np.array(trainY, dtype='float32') # print (trainX.shape) # print(trainY.shape,'trainY SHAPE') trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1)) # trainXS = np.vstack((trainXS, trainX)) # trainYS = np.vstack((trainYS, trainY)) # print (trainXS.shape,'trainxs shape') # break # return trainXS.append(trainX) trainYS.append(trainY) """ training """ for i in range(100): hist = model.fit(trainX, trainY, nb_epoch=1, batch_size=batch_size, verbose=2, validation_split=0.1, shuffle=False) model.reset_states() train_hists.append(hist.history['loss'][0]) validation_hists.append(hist.history['val_loss'][0]) # print (hists,'hists') train_histss.append(train_hists) validation_histss.append(validation_hists) print(train_histss, 'train_histss') print(validation_histss, 'validation_histss') """
def main(): np.random.seed(7) t1 = time.time() """ get the config information for the model """ image_path = config.image_path # the image dataset root folder track_path = config.track_path # the track dataset root folder track_dic_path = config.track_dic_path # the track dict path which I configured from the track dataset track_dict = load.load_json(track_dic_path) intensity_mean, intensity_std = config.intensity_mean, config.intensity_std # get the intensity mean, std information batch_size = config.batch_size ModelCheckpoint_file = config.ModelCheckpoint_file # the path which saved the weights # ModelCheckpoint_file = 'test_file/orig_weights_lstm_1.0_image_lookback_24_whole_equal_pretrain_epoch_1000_adadelta_0.0001_zero_prediction_initial_normalization_unequal_equal_whole.hdf5' ModelCheckpoint_file = 'test_file/orig_weights_lstm_1.0_image_lookback_24_whole_equal_pretrain_epoch_1000_adadelta_0.0001_prediction_initial_normalization.hdf5' # ModelCheckpoint_file = 'test_file/orig_weights_lstm_1' look_back = config.look_back # the time difference which you set # look_back = 6 img_rows, img_cols = config.img_rows, config.img_cols # image dimension which you used 224,224 subdir_list = [] hist_path = config.hist_path # the hist path which you saved for keras model = pretrain_model(look_back, batch_size) # get the lstm model if os.path.exists(ModelCheckpoint_file): print('load load_weights', ModelCheckpoint_file) model.load_weights(ModelCheckpoint_file) print(model.summary()) # train_x = np.random.uniform(0,1,(17, 3, 1, 512, 512)) # train_y = np.random.uniform(0,1,(17,1)) # print (train_x) # train_x = np.array(train_x,dtype = 'float32') # train_y = np.array(train_y,dtype= 'float32') # hist = model.fit(train_x, train_y, nb_epoch=1, batch_size=batch_size, verbose=2, validation_split=0.1,shuffle=False) """ count the number of image in each typhoon sequence """ """ image_number_dictionary={} for subdirs, dirs, files in os.walk(image_path): # print (subdirs) subdir_list.append(subdirs) for subdir in subdir_list: count = 0 for subdirs, dirs, files in os.walk(subdir): for file in files: count += 1 key = subdir.split('/')[-1] image_number_dictionary[key] = count if count < 24: print (key,count) """ # print (image_number_dictionary) """ check the number of images equals the number of track data? """ # for subdir in subdir_list: # for subdirs, dirs, files in os.walk(subdir): # for file in files: # # print (file) # [k1, k2] = file.split("-")[:2] # key = "".join((k1,k2)) # try: # mark = track_dict[key] # except KeyError: # print (file +'do not have track value') # for k in track_dict.keys(): # k2 = k[-6:] # typhoon number # k1 = k[:-6] # file = k1 +'-' + k2 +'*' # file_path = image_path + k2 +'/' + file # if not os.path.isfile(file_path): # print (file_path not exists) """ get the equal_data folder in which the image data is not lost, and the uneuqal data folder in which the image data is lost """ track_dict_number = {} equal_track_image_list = [] not_equal_track_image_list = [] for subdir in subdir_list: key = subdir.split('/')[-1] if len(key) > 0 and key not in ['201620', '201621', '201622']: track_file_path = track_path + key + '.itk' with open(track_file_path, 'rb') as tsv_file: tsv_reader = csv.reader(tsv_file, delimiter='\t') count = 0 for row in tsv_reader: count += 1 track_dict_number[key] = count if count != image_number_dictionary[key]: not_equal_track_image_list.append(key) # print (key,count,image_number_dictionary[key],'not equal') if count == image_number_dictionary[key]: # print (key,count,image_number_dictionary[key],' equal') equal_track_image_list.append(key) # print (not_equal_track_image_list,'not_equal_track_image_list') # print (equal_track_image_list,'equal_track_image_list') """ # check_intensities statistics data_folder = not_equal_track_image_list + equal_track_image_list intensities=[] for folder in data_folder: file_name = track_path + folder+'.itk' with open(file_name,'rb') as tsv_file: tsv_reader = csv.reader(tsv_file, delimiter='\t') for row in tsv_reader: #print row.type intensity = float(row[-2]) intensities.append(intensity) intensities = np.array(intensities) print intensities print intensities.shape print np.mean(intensities,axis=0),'mean' print np.std(intensities,axis=0),'std' print np.min(intensities,axis=0),'min' print np.max(intensities,axis =0),'max' """ print(len(equal_track_image_list), 'lenth of eqaual track image list') # "check if track file difference is one hour, result is yes for both equal and not_eqaul_image_list " """ for key in not_equal_track_image_list: ts =[] track_file_path = track_path + key+'.itk' with open(track_file_path,'rb') as tsv_file: tsv_reader = csv.reader(tsv_file, delimiter='\t') for row in tsv_reader: yy = row[0] mm = row[1] dd = row[2] hh = row[3] t = datetime.datetime.strptime(yy +":" + mm +":" + dd +':' +hh, '%Y:%m:%d:%H') ts.append(t) tmp = ts[0] for i in range(1,len(ts)): dif = (ts[i] - tmp).total_seconds() # print (dif,'dif') if dif != 3600: print (dif,i,key) tmp = ts[i] # break """ data_folder_path = config.data_folder_path # train and test data folder # data_folder_path ='test_file/sorted_intensity_data_folder.json' if not os.path.exists(data_folder_path): equal_track_image_list = np.array(equal_track_image_list) np.random.shuffle(equal_track_image_list) equal_track_image_list = list(equal_track_image_list) # equal_track_image_list = equal_track_image_list[:2] train_folder = equal_track_image_list[:int(0.9 * len(equal_track_image_list) )] test_folder = equal_track_image_list[int(0.9 * len(equal_track_image_list)):] with open(data_folder_path, 'w') as f: json.dump( { 'train_folder': train_folder, 'test_folder': test_folder }, f) print('data_folder_path dumped to: ', data_folder_path) else: with open(data_folder_path, 'r') as f: data_folder = json.load(f) train_folder = data_folder['train_folder'] test_folder = data_folder['test_folder'] print('load data folder from: ', data_folder_path) dataset_image_dic = {} dataset_intensity_dic = {} dataset_image_path = 'test_file/dataset_imageset.hdf5' # the image dataset which I get from the last layer of convolutional neural network, key: typhoon number, value: a list of images dataset_intensity_path = 'test_file/dataset_intensity.hdf5' # the intensity dataset , key: typhoon number, value: a list of intensity # equal_track_image_list=equal_track_image_list[:2] # if not os.path.exists(dataset_image_path) : # vgg_model = VGG_16('vgg16_weights.h5') # sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) # vgg_model.compile(optimizer=sgd, loss='categorical_crossentropy') # hf_image = h5py.File(dataset_image_path) # hf_intensity = h5py.File(dataset_intensity_path) # for key in equal_track_image_list: # print(key) # image_folder = image_path + key +'/' # track_file_path = track_path + key + '.itk' # dataset_image = prepare_dataset.dataset_2(image_folder) # dataset_input = get_fc2(vgg_model,dataset_image) # dataset_input = np.array(dataset_input) # dataset_intensity = prepare_dataset.dataset_1(track_file_path) # dataset_intensity = prepare_dataset.normalize_intensity(dataset_intensity,intensity_mean,intensity_std) # print (dataset_input.shape,'dataset_image.shape') # print (dataset_intensity.shape,'dataset_intensity') # dataset_image_dic[key] = dataset_input # dataset_intensity_dic[key] = dataset_intensity # hf_image.create_dataset(key, data = dataset_input) # hf_intensity.create_dataset(key, data = dataset_intensity) # hf_image.close() # hf_intensity.close() # print ('dumped data into hf_image,intensity') # else: # print ('hf_image intensity exists') # for key in equal_track_image_list: # with h5py.File(dataset_image_path,'r') as hf_image: # dataset_image = np.array(hf_image.get(key)) # with h5py.File(dataset_intensity_path,'r') as hf_intensity: # dataset_intensity = np.array(hf_intensity.get(key)) # print (key, dataset_image.shape,dataset_intensity.shape) # train_selected_folder_index = random.sample(range(0,len(train_folder)),10) # test_selected_folder_index = random.sample(range(0,len(test_folder)),10) hf_image = h5py.File(dataset_image_path) hf_intensity = h5py.File(dataset_intensity_path) # for i in train_selected_folder_index: # key = train_folder[i] # train_folder=['201314'] # train_folder=['199406'] # key_already_list=[] # for subdirs, dirs, files in os.walk('test_file/prediction_output_24/'): # # print (subdirs) # for file in files: # print file # key_already = file.split('_')[0] # key_already_list.append(key_already) # train_folder = list(set(train_folder) - set(key_already_list)) # test_folder = list(set(test_folder) - set(key_already_list)) print(len(train_folder), len(test_folder), 'len_train_test_folder') for key in train_folder: print(key) if os.path.exists(ModelCheckpoint_file): print('load load_weights', ModelCheckpoint_file) model.load_weights(ModelCheckpoint_file) dataset_image = np.array(hf_image.get(key)) dataset_intensity = np.array(hf_intensity.get(key)) # print (dataset_image.shape,'dataset_image') if len(dataset_intensity) > look_back: train_x, train_y = prepare_dataset.extend_dataset_2( dataset_image, dataset_intensity, look_back=look_back) train_x = np.array(train_x, dtype='float32') train_y = np.array(train_y, dtype='float32') if train_x.shape[0] > 0: train_predict_image = 'test_file/prediction_output_24/' + str( key) + '_' + str(look_back) + '_train.png' trainPredict = model.predict(train_x, batch_size=batch_size) model.reset_states() trainPredict = prepare_dataset.reverse_normalize_intensity( trainPredict, intensity_mean, intensity_std) trainY = prepare_dataset.reverse_normalize_intensity( train_y, intensity_mean, intensity_std) # print (trainPredict,'train_predict') # print (trainY,'trainY') fig = plt.figure() plt.title('train_predicts_look_back ' + str(look_back) + ', typhoon number ' + str(key)) plt.plot(list(trainPredict[20:20000, 0]), 'r--', label='train_predict') plt.plot(list(trainY[20:20000]), 'g--', label='train_true') plt.xlabel('typhoon_image') plt.ylabel('typhoon intensity') plt.ylim([850, 1050]) plt.legend(loc='upper left', shadow=True) plt.savefig(train_predict_image) plt.close(fig) # for i in test_selected_folder_index: for key in test_folder: # key = test_folder[i] print(key) if os.path.exists(ModelCheckpoint_file): print('load load_weights', ModelCheckpoint_file) model.load_weights(ModelCheckpoint_file) dataset_image = np.array(hf_image.get(key)) dataset_intensity = np.array(hf_intensity.get(key)) if len(dataset_intensity) > look_back: test_x, test_y = prepare_dataset.extend_dataset_2( dataset_image, dataset_intensity, look_back=look_back) test_x = np.array(test_x, dtype='float32') test_y = np.array(test_y, dtype='float32') if test_x.shape[0] > 0: testPredict = model.predict(test_x, batch_size=batch_size) model.reset_states() # # # invert predictions testPredict = prepare_dataset.reverse_normalize_intensity( testPredict, intensity_mean, intensity_std) testY = prepare_dataset.reverse_normalize_intensity( test_y, intensity_mean, intensity_std) test_predict_image = 'test_file/prediction_output_24/' + str( key) + '_' + str(look_back) + '_test.png' fig = plt.figure() plt.title('test_predicts_look_back ' + str(look_back) + ', typhoon number ' + str(key)) plt.plot(list(testPredict[20:10000, 0]), 'r--', label='test_predict') plt.plot(list(testY[20:10000]), 'g--', label='test_true') plt.xlabel('typhoon_image') plt.ylabel('typhoon intensity') plt.ylim([850, 1050]) plt.legend(loc='upper left', shadow=True) plt.savefig(test_predict_image) plt.close(fig) hf_image.close() hf_intensity.close() t2 = time.time() print("using %s seconds" % (t2 - t1))
# 20. JSONデータの読みこみ # Wikipedia記事のJSONファイルを読みこみ「イギリス」に関する記事本文を表示せよ. # 問題21-29では,ここで抽出した記事本文に対して実行せよ. # coding: utf-8 import load print(load.load_json("イギリス"))
def main(): t1 = time.time() train_test_file_list_path = config.train_test_file_path_divid image_path = config.image_path trackDictPath = config.track_dic_path track_dict = load.load_json(trackDictPath) suspicious_file_list_path = config.suspicious_file_list_path suspicious_file_list = load.load_json(suspicious_file_list_path) train_validation_test_subdirs_split = config.train_validation_test_subdirs_split yType = config.yType csv_path = config.csv_path confusion_matrix_path = config.confusion_matrix_path hist_path = config.hist_path nb_epoch = config.nb_epoch optimizer_choice = config.optimizer img_rows, img_cols = config.img_rows, config.img_cols model_check_pointer_file = config.ModelCheckpoint_file nb_worker = config.nb_worker num_labels = config.num_labels batch_size = config.batch_size mean_v, std_v = config.mean_v, config.std_v if not os.path.exists(train_validation_test_subdirs_split): print 'subdirs not split' subdirs_list = load.get_subdirs_list(image_path) train_subdirs_list, validation_subdirs_list, test_subdirs_list = load.split_subdirs( subdirs_list, train_validation_test_subdirs_split) else: print 'subdirs splitted' train_subdirs_list, validation_subdirs_list, test_subdirs_list = load.get_split_subdirs( train_validation_test_subdirs_split) optimizer = classification_model.optimizer_selection( optimizer_choice, nb_epoch) model = classification_model.vgg_19(img_rows, img_cols, num_labels, optimizer) model.summary() # file_list = subtract_suspicious_list(file_list,suspicious_file_list) # trackDictPath = config.track_dic_path # yType = config.yType train_file_list, test_file_list = load.get_train_test_file_split( train_subdirs_list, validation_subdirs_list, test_subdirs_list, track_dict, suspicious_file_list) validation_file_list = train_file_list[:int(len(train_file_list) * 0.05)] train_file_list = train_file_list[int(len(train_file_list) * 0.05):] # if not os.path.exists(train_test_file_list_path): # print 'file_list not splited' # train_file_list ,validation_file_list,test_file_list = load.get_train_validation_test_file_split(train_subdirs_list, validation_subdirs_list, test_subdirs_list,track_dict,suspicious_file_list,train_test_file_list_path) # else: # print 'file list splitted' # train_file_list, validation_file_list,test_file_list = load.load_train_validation_test_file_list(train_test_file_list_path) # print len(file_list) print len(train_file_list) print len(validation_file_list) print len(test_file_list) load.get_input_2(train_file_list, trackDictPath) y_train, y_valid, y_test = load.get_train_validation_test_y( train_file_list, validation_file_list, test_file_list, trackDictPath, yType) print('y_train', len(y_train)) print('y_valid', len(y_valid)) print('y_test', len(y_test)) print(type(y_train)) # print (y_train[0].shape,'train shape') # train_file_list = train_file_list[:2000] # validation_file_list = validation_file_list[-1000:] # test_file_list = test_file_list[:1000] # y_train = y_train[:2000] # y_valid = y_valid[-1000:] # y_test = y_test[:1000] print(get_category_reverse_back(y_train), 'set_y_train') print(get_category_reverse_back(y_valid), 'set_y_valid') print(get_category_reverse_back(y_test), 'set_y_test') print(y_train.shape) print(train_file_list, 'train_file_list') print(validation_file_list, 'validation_file_list') print(test_file_list, 'test_file_list') x_train = load.get_x(train_file_list, img_rows, img_cols, mean_v, std_v) x_valid = load.get_x(validation_file_list, img_rows, img_cols, mean_v, std_v) x_test = load.get_x(test_file_list, img_rows, img_cols, mean_v, std_v) print(x_train.shape) print(y_train.shape) train_generator = load.get_chunk(train_file_list, y_train, img_rows, img_cols, num_labels) validation_generator = load.get_chunk(validation_file_list, y_valid, img_rows, img_cols, num_labels) test_generator = load.get_test_chunk(test_file_list, img_rows, img_cols) print(model.layers[0].get_config()) print(model.layers[-1].get_config()) if os.path.exists(model_check_pointer_file): model.load_weights(model_check_pointer_file) # hist = training(model,train_generator,validation_generator,img_rows,img_cols,128,nb_epoch,len(train_file_list),100, nb_worker,model_check_pointer_file) # hist = model_training(model,train_generator,validation_generator,img_rows,img_cols,32,nb_epoch,len(train_file_list),model_check_pointer_file) hist = classification_model.model_training_whole(model, x_train, y_train, x_valid, y_valid, batch_size, nb_epoch, model_check_pointer_file) with open(hist_path, 'w') as f: json.dump(hist.history, f) model.load_weights(model_check_pointer_file) predictions = model_predicting(model, test_generator, len(y_test)) _predictions = np.argmax(predictions, 1) _labels = np.argmax(y_test, 1) write_to_csv(test_file_list, _predictions, _labels, csv_path) accuracy, cm = get_accuracy(_predictions, _labels, True) print(accuracy, 'test accuracy') print(optimizer_choice, 'optimizer_choice') print(cm, 'cm') cm = cm.tolist() with open(confusion_matrix_path, 'w') as f: json.dump(cm, f) t2 = time.time() print('using' + str(t2 - t1))