Ejemplo n.º 1
0
def handler(event, context):
    """
    entry point for Lambda function
    :param event: the Lambda event
    :param context: the Lambda context
    :return: None
    """

    print(f"'event': {event}")
    print(f"'context': {context}")

    # -----------------------------------------------------
    # EXTRACT

    # define ny_dataset
    ny_dataset = classes.Dataset("ny_dataset")
    ny_dataset.headers_all = ["date", "cases", "deaths"]
    ny_dataset.headers_key = ny_dataset.headers_all
    ny_dataset.match_field = "date"
    ny_dataset.source_url = "https://raw.githubusercontent.com/nytimes/covid-19-data/master/us.csv"

    # extract and print ny_dataset
    ny_dataset.df = extract.extract(ny_dataset.source_url)
    print(f"'ny_dataset.df':\n{ny_dataset.df}")

    # define jh_dataset
    jh_dataset = classes.Dataset("jh_dataset")
    jh_dataset.headers_all = [
        "Date", "Country/Region", "Province/State", "Lat", "Long", "Confirmed",
        "Recovered", "Deaths"
    ]
    jh_dataset.headers_key = ["Date", "Country/Region", "Recovered"]
    jh_dataset.match_field = "Date"
    jh_dataset.source_url = \
        "https://raw.githubusercontent.com/datasets/covid-19/master/data/time-series-19-covid-combined.csv"

    # extract and print jh_dataset
    jh_dataset.df = extract.extract(jh_dataset.source_url,
                                    jh_dataset.headers_key, "Country/Region",
                                    "US")
    print(f"'jh_dataset.df':\n{jh_dataset.df}")

    # -----------------------------------------------------
    # TRANSFORM

    # transform the datasets into CovidStat Instances
    covid_stats = transform.transform(ny_dataset, jh_dataset)

    # print CovidStats
    print(*covid_stats, sep="\n")

    # -----------------------------------------------------
    # LOAD

    # load CovidStat instances into the CovidStats DynamoDB table
    load.load_all(classes.CovidStat, covid_stats)
    load.load_json(covid_stats)
def main():
	np.random.seed(7)
	t1 = time.time()
	image_path = config.image_path
	track_path = config.track_path
	track_dic_path = config.track_dic_path
	track_dict = load.load_json(track_dic_path)
	intensity_mean,intensity_std = config.intensity_mean, config.intensity_std
	batch_size = config.batch_size
	ModelCheckpoint_file = config.ModelCheckpoint_file
	look_back = config.look_back
	img_rows,img_cols = config.img_rows,config.img_cols
	subdir_list = []
	hist_path = config.hist_path

	# train_x = np.random.uniform(0,1,(17, 3, 1, 512, 512))
	# train_y = np.random.uniform(0,1,(17,1))
	# print (train_x)
	# train_x = np.array(train_x,dtype = 'float32')
	# train_y = np.array(train_y,dtype= 'float32')
	# hist = model.fit(train_x, train_y, nb_epoch=1, batch_size=batch_size, verbose=2, validation_split=0.1,shuffle=False)

	"""
	count the number of image in each typhoon sequence
	"""
	image_number_dictionary={}
	for  subdirs, dirs, files in os.walk(image_path):
		# print (subdirs)
		subdir_list.append(subdirs)
	for subdir in subdir_list:
		count = 0
		for subdirs, dirs, files in os.walk(subdir):
			for file in files:
				count += 1
		key = subdir.split('/')[-1]
		image_number_dictionary[key] = count
		if count < 24:
			print (key,count)
	# print (image_number_dictionary)

	"""
	check the number of images equals the number of track data?
	"""
	# for subdir in subdir_list:
	# 	for subdirs, dirs, files in os.walk(subdir):
	# 		for file in files:
	# 			# print (file)
	# 			[k1, k2] = file.split("-")[:2]
	# 			key = "".join((k1,k2))
	# 			try:
	# 				mark = track_dict[key]
	# 			except KeyError:
	# 				print (file +'do not have track value')
	

# for k in track_dict.keys():
# 	k2 = k[-6:] # typhoon number
# 	k1 = k[:-6]
# 	file = k1 +'-' + k2 +'*'
# 	file_path = image_path + k2 +'/' + file
# 	if not os.path.isfile(file_path):
# 		print (file_path not exists)
	track_dict_number ={}
	equal_track_image_list = []
	not_equal_track_image_list = []
	for subdir in subdir_list:
		key =subdir.split('/')[-1] 

		if len(key) > 0 and key not in ['201620','201621','201622']:
			track_file_path = track_path + key+'.itk'
			with open(track_file_path,'rb') as tsv_file:
				tsv_reader = csv.reader(tsv_file, delimiter='\t')
				count = 0
				for row in tsv_reader:
					count += 1
				track_dict_number[key] = count
				if count != image_number_dictionary[key]:
					not_equal_track_image_list.append(key)
					# print (key,count,image_number_dictionary[key],'not equal')
				if count == image_number_dictionary[key]:
					# print  (key,count,image_number_dictionary[key],' equal')
					equal_track_image_list.append(key)
	# print (not_equal_track_image_list,'not_equal_track_image_list')
	# print (equal_track_image_list,'equal_track_image_list')
		
	print (len(equal_track_image_list),'lenth of eqaual track image list')
	# "check if track file difference is one hour, result is yes for both equal and not_eqaul_image_list "

	for key in not_equal_track_image_list:
			ts =[]
			track_file_path = track_path + key+'.itk'
			with open(track_file_path,'rb') as tsv_file:
				tsv_reader = csv.reader(tsv_file, delimiter='\t')
				for row in tsv_reader:
					yy = row[0]
					mm = row[1]
					dd = row[2]
					hh = row[3]
					t = datetime.datetime.strptime(yy +":" + mm +":" + dd +':' +hh, '%Y:%m:%d:%H')
					ts.append(t)
			tmp = ts[0]
			for i in range(1,len(ts)):
				dif = (ts[i] - tmp).total_seconds()
				# print (dif,'dif')
				if dif != 3600:
					print (dif,i,key)
				tmp = ts[i]
			# break
	data_folder_path = config.data_folder_path
	if not os.path.exists(data_folder_path): 
		equal_track_image_list = np.array(equal_track_image_list)
		np.random.shuffle(equal_track_image_list)
		equal_track_image_list = list(equal_track_image_list)
		# equal_track_image_list = equal_track_image_list[:2]
		train_folder = equal_track_image_list[:int(0.9 * len(equal_track_image_list))]
		test_folder = equal_track_image_list[int(0.9* len(equal_track_image_list)):]
		with open(data_folder_path,'w') as f:
			json.dump({'train_folder':train_folder,'test_folder': test_folder},f)
			print ('data_folder_path dumped to: ',data_folder_path)
	else:
		with open(data_folder_path,'r') as f:
			data_folder = json.load(f)
			train_folder = data_folder['train_folder']
			test_folder = data_folder['test_folder']
			print ('load data folder from: ' , data_folder_path)




	"""
	data_path = config.data_path
	
	if not os.path.exists(data_path):
		train_x =[]
		train_y=[]
		test_x = []
		test_y = []
		vgg_model = VGG_16('vgg16_weights.h5')
		sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
	   	vgg_model.compile(optimizer=sgd, loss='categorical_crossentropy')
		for key in test_folder:
			print(key)
			image_folder = image_path + key +'/'
			track_file_path = track_path + key + '.itk'
			dataset_image = prepare_dataset.dataset_2(image_folder)
			print (dataset_image.shape)
			dataset_input = get_fc2(vgg_model,dataset_image)
			dataset_intensity = prepare_dataset.dataset_1(track_file_path)
			dataset_intensity = prepare_dataset.normalize_intensity(dataset_intensity,intensity_mean,intensity_std)
			print (dataset_image.shape,'dataset_image.shape')
			print (dataset_intensity.shape,'dataset_intensity')
			data_x,data_y = prepare_dataset.create_dataset_2(dataset_input, dataset_intensity,look_back = look_back)
			test_x += data_x
			test_y += data_y
		# print test_y.shape,test_y
		# train_histss =[]
		# validation_histss=[]
		for key in train_folder:
			print(key)
			image_folder = image_path + key +'/'
			track_file_path = track_path + key + '.itk'
			dataset_image = prepare_dataset.dataset_2(image_folder)
			dataset_input = get_fc2(vgg_model,dataset_image)
			dataset_intensity = prepare_dataset.dataset_1(track_file_path)
			dataset_intensity = prepare_dataset.normalize_intensity(dataset_intensity,intensity_mean,intensity_std)
			print (dataset_image.shape,'dataset_image.shape')
			print (dataset_intensity.shape,'dataset_intensity')
			data_x,data_y = prepare_dataset.create_dataset_2(dataset_input, dataset_intensity,look_back = look_back)
			# print (len(data_x))
			train_x += data_x
			train_y += data_y
			data_x = np.array(data_x)
			data_y = np.array(data_y)
			# print (data_x.shape,data_y.shape,'data_x,data_y')
			# train_hists=[]
			# validation_hists=[]
			# for i in range(20):
			# 	print('start train')
			# 	hist = model.fit(data_x, data_y, nb_epoch=1, batch_size=batch_size, verbose=2, validation_split=0.1,shuffle=False)
			# 	model.reset_states()
			# 	train_hists.append(hist.history['loss'][0])
			# 	validation_hists.append(hist.history['val_loss'][0])
			# # print (hists,'hists')
			# train_histss.append(train_hists)
			# validation_histss.append(validation_hists)

		# print (train_histss,'train_histss')
		# print (validation_histss, 'validation_histss')
		
			# print ((data_x.shape),data_y.shape)
		train_x = np.array(train_x,dtype = 'float32')
		train_y = np.array(train_y,dtype = 'float32')
		test_x = np.array(test_x,dtype = 'float32')
		test_y = np.array(test_y,dtype = 'float32')
		
		hf = h5py.File(data_path)
		hf.create_dataset('train_x',data = train_x)
		hf.create_dataset('train_y',data = train_y)
		hf.create_dataset('test_x', data= test_x)
		hf.create_dataset('test_y', data= test_y)
		hf.close()
		print ('dump train test data to' ,data_path)

	else:
		with h5py.File(data_path,'r') as hf:
			train_x = np.array(hf.get('train_x'))
			train_y = np.array(hf.get('train_y'))
			test_x = np.array(hf.get('test_x'))
			test_y = np.array(hf.get('test_y'))
		print ('loaded train test data from ', data_path)
	print (train_x.shape,train_y.shape)
	print (test_x.shape,test_y.shape)
	"""

	# get train test data from pre_built dataset
	dataset_image_path = 'test_file/dataset_imageset.hdf5'
	dataset_type_path = 'test_file/dataset_type.hdf5'

	hf_image = h5py.File(dataset_image_path)

	hf_type = h5py.File(dataset_type_path)
	train_x = []
	train_y = []
	test_x = []
	test_y = []

	vgg_fc2_mean = config.vgg_fc2_mean
	vgg_fc2_std = config.vgg_fc2_std
	"""
	dataset_imageset
	0.423964 mean data
	0.569374 std data
	0.0 min
	4.71836 max
	"""
	# train_folder =train_folder[:2]
	# test_folder = test_folder[:2]
	for key in train_folder:
		print(key)
		dataset_image = np.array(hf_image.get(key))
		dataset_image = prepare_dataset.normalize_intensity(dataset_image,vgg_fc2_mean,vgg_fc2_std) #normalize image (the same function of normalize intensity)
		dataset_type = np.array(hf_type.get(key))

		if len(dataset_image) > look_back:
			data_x,data_y = prepare_dataset.extend_dataset_2(dataset_image, dataset_type,look_back = look_back)
			train_x += data_x
			train_y += data_y

	for key in test_folder:
		print (key)
		dataset_image = np.array(hf_image.get(key))
		dataset_image = prepare_dataset.normalize_intensity(dataset_image,vgg_fc2_mean,vgg_fc2_std)
		dataset_type = np.array(hf_type.get(key))
		if len(dataset_image) > look_back:
			data_x,data_y = prepare_dataset.extend_dataset_2(dataset_image, dataset_type,look_back = look_back)
			test_x += data_x
			test_y += data_y
	hf_type.close()
	hf_image.close()
	# train = train_x + test_x
	train_x = np.array(train_x,dtype = 'float32')
	train_y = np.array(train_y,dtype = 'float32')
	test_x = np.array(test_x,dtype = 'float32')
	test_y = np.array(test_y,dtype = 'float32')
	print (train_x.shape,train_y.shape)
	print (test_x.shape,test_y.shape)
	# nb_classes = max(len(set(train_y)), len(set(test_y)))
	# print set(train_y)
	# print set(test_y)
	# print nb_classes,'nb_classes'
	model = pretrain_model(look_back,batch_size)
	if os.path.exists(ModelCheckpoint_file):
		print ('load  load_weights',ModelCheckpoint_file)
		model.load_weights(ModelCheckpoint_file)
	print(model.summary())
	y_train = np_utils.to_categorical(train_y, None)
	y_test = np_utils.to_categorical(test_y, None)
	print y_train.shape

	train_loss_hists=[]
	validation_loss_hists=[]
	train_acc_hists=[]
	validation_acc_hists=[]
	val_acc =  sys.float_info.min

	for i in range(1000):
		print (i,'epoch')
		# ModelCheckpoint_file = 'test_file/orig_weights_lstm_1.0_image_lookback_'+str(look_back)+str(i)+'_whole_equal.hdf5'
		# print('start train')
		hist = model.fit(train_x, y_train, nb_epoch=1, batch_size=batch_size, verbose=2, validation_split=0.1,shuffle=False)
		print hist.history
		model.reset_states()
		train_loss_hists.append(hist.history['loss'][0])
		validation_loss_hists.append(hist.history['val_loss'][0])
		train_acc_hists.append(hist.history['acc'][0])
		validation_acc_hists.append(hist.history['val_acc'][0])
		if val_acc < hist.history['val_acc'][0]:
			model.save_weights(ModelCheckpoint_file)
			print(i,val_acc,'->',hist.history['val_acc'][0],'save_weights',ModelCheckpoint_file)
			val_acc = hist.history['val_acc'][0]
	# print (train_hists,'train_hists')
	# print (validation_hists, 'validation_hists')
	with open(hist_path,'w') as f:
		json.dump({'train_loss':train_loss_hists,'val_loss':validation_loss_hists,'train_acc':train_acc_hists,'val_acc':validation_acc_hists},f)
	# hist = model.fit(train_x, train_y, nb_epoch=2, batch_size=batch_size, verbose=2, validation_split = 0.1,shuffle=False)
		# break
	# with open(hist_path,'w') as j:
	# 	json.dump(hist.history,j)
	# validation_hists_least_index = validation_hists.index(min(validation_hists))
	# print ('ModelCheckpoint_file','test_file/orig_weights_lstm_1.0_image_lookback_'+str(look_back)+str(validation_hists_least_index)+'_whole_equal.hdf5')
	# model.load_weights('test_file/orig_weights_lstm_1.0_image_lookback_'+str(look_back)+str(validation_hists_least_index)+'_whole_equal.hdf5')
	
	print('load_weights',ModelCheckpoint_file)
	model.load_weights(ModelCheckpoint_file)
	trainPredict = model.predict(train_x, batch_size=batch_size)
	
	model.reset_states()

	
	testPredict = model.predict(test_x, batch_size=batch_size)
	# # invert predictions


	# # calculate root mean squared error
	
	train_predictions = np.argmax(trainPredict, 1)
	train_labels = np.argmax(y_train, 1)
	test_predictions = np.argmax(testPredict, 1)
	test_labels = np.argmax(y_test, 1)


	print(look_back,'look_back')
	train_accuracy, train_cm = get_accuracy(train_predictions, train_labels, True)
	test_accuracy, test_cm = get_accuracy(test_predictions, test_labels, True)

	print (train_accuracy,'train accuracy')
	print(train_cm,'train_cm')
	print (test_accuracy,'test accuracy')
	print(test_cm,'test_cm')

	train_cm = train_cm.tolist()
	train_confusion_matrix_path = 'test_file/confusion_matrix_train_extend_normalize_'+ str( look_back) +'.json'
	with open(train_confusion_matrix_path, 'w') as f:
		json.dump(train_cm,f)

	test_cm = test_cm.tolist()
	test_confusion_matrix_path = 'test_file/confusion_matrix_test_extend_normalize_'+ str(look_back) +'.json'
	with open(test_confusion_matrix_path, 'w') as f:
		json.dump(test_cm,f)

	t2 = time.time()
	print ("using  %s seconds" % (t2-t1))
Ejemplo n.º 3
0
def main():
    t1 = time.time()
    train_test_file_list_path = config.train_test_file_path_divid
    image_path = config.image_path
    trackDictPath = config.track_dic_path
    track_dict = load.load_json(trackDictPath)
    suspicious_file_list_path = config.suspicious_file_list_path
    suspicious_file_list = load.load_json(suspicious_file_list_path)
    train_validation_test_subdirs_split = config.train_validation_test_subdirs_split
    yType = config.yType
    csv_path = config.csv_path
    confusion_matrix_path = config.confusion_matrix_path
    hist_path = config.hist_path
    nb_epoch = config.nb_epoch
    optimizer_choice = config.optimizer
    img_rows, img_cols = config.img_rows, config.img_cols
    model_check_pointer_file = config.ModelCheckpoint_file
    nb_worker = config.nb_worker
    num_labels = config.num_labels
    batch_size = config.batch_size
    mean_v, std_v = config.mean_v, config.std_v
    if not os.path.exists(train_validation_test_subdirs_split):
        print 'subdirs not split'
        subdirs_list = load.get_subdirs_list(image_path)
        train_subdirs_list, validation_subdirs_list, test_subdirs_list = load.split_subdirs(
            subdirs_list, train_validation_test_subdirs_split)
    else:
        print 'subdirs splitted'

        train_subdirs_list, validation_subdirs_list, test_subdirs_list = load.get_split_subdirs(
            train_validation_test_subdirs_split)
    optimizer = classification_model.optimizer_selection(
        optimizer_choice, nb_epoch)
    # model = classification_model.vgg_19_with_l2_regularizer(img_rows,img_cols,num_labels,optimizer)
    model_1 = classification_model.vgg_16(img_rows, img_cols, num_labels,
                                          optimizer)
    model_2 = classification_model.model_2()
    model = classification_model.merge_model(model_1, model_2, optimizer,
                                             num_labels)
    model.summary()

    # file_list = subtract_suspicious_list(file_list,suspicious_file_list)
    # trackDictPath = config.track_dic_path
    # yType = config.yType
    # train_file_list, test_file_list =  load.get_train_test_file_split(train_subdirs_list,validation_subdirs_list,test_subdirs_list,track_dict,suspicious_file_list)
    # validation_file_list = train_file_list[:int(len(train_file_list) * 0.05)]
    # train_file_list = train_file_list[int(len(train_file_list) *0.05):]
    if not os.path.exists(train_test_file_list_path):
        print 'file_list not splited'
        train_file_list, validation_file_list, test_file_list = load.get_train_validation_test_file_split(
            train_subdirs_list, validation_subdirs_list, test_subdirs_list,
            track_dict, suspicious_file_list, train_test_file_list_path)
    else:
        print 'file list splitted'
        train_file_list, validation_file_list, test_file_list = load.load_train_validation_test_file_list(
            train_test_file_list_path)
    y_train, y_valid, y_test = load.get_train_validation_test_y(
        train_file_list, validation_file_list, test_file_list, trackDictPath,
        yType)

    # print len(file_list)
    # print len(train_file_list)
    # print len(validation_file_list)
    # print len(test_file_list)
    # print ('y_train',len(y_train))
    # print ('y_valid', len(y_valid))
    # print ('y_test',len(y_test))
    # print (type(y_train))
    print(y_train[0].shape, 'train shape')
    # train_file_list = train_file_list[:200]
    # validation_file_list = validation_file_list[-100:]
    # test_file_list = test_file_list[:100]
    # y_train = y_train[:200]
    # y_valid = y_valid[-100:]
    # y_test = y_test[:100]
    x_train = load.get_x(train_file_list)
    x_valid = load.get_x(validation_file_list)
    x_test = load.get_x(test_file_list)

    input_2_train = []
    input_2_valid = []
    input_2_test = []
    for file in train_file_list:
        input_2_train.append(load.get_data_2(track_dict, file))
    for file in validation_file_list:
        input_2_valid.append(load.get_data_2(track_dict, file))
    for file in test_file_list:
        input_2_test.append(load.get_data_2(track_dict, file))
    input_2_train = np.array(input_2_train)
    input_2_valid = np.array(input_2_valid)
    input_2_test = np.array(input_2_test)

    # print (x_train.shape)in
    # print(y_train.shape)

    # print (get_category_reverse_back(y_train),'set_y_train')
    # print (get_category_reverse_back(y_valid),'set_y_valid')
    # print (get_category_reverse_back(y_test),'set_y_test')
    # print (y_train.shape)
    # print (train_file_list, 'train_file_list')
    # print (validation_file_list,'validation_file_list')
    # print (test_file_list,'test_file_list')
    random_sample_index = random.sample(xrange(len(train_file_list)),
                                        int(len(train_file_list)))
    x_train_2 = []
    y_train_2 = []
    input_2_train_2 = []
    for index in random_sample_index:
        file_path = train_file_list[index]
        x = load.rotate_image(file_path)
        y = y_train[index]
        # y = load.get_y_file(file_path,track_dict,yType)
        # print x.shape,x
        # print y
        x_train_2.append(x)
        y_train_2.append(y)
        input_2_train_2.append(load.get_data_2(track_dict, file_path))
    x_train_2 = np.array(x_train_2)
    x_train_2 = np.reshape(x_train_2, (-1, 1, img_rows, img_cols))
    input_2_train_2 = np.array(input_2_train_2)

    x_train = np.concatenate((x_train, x_train_2), axis=0)
    y_train = np.concatenate((y_train, y_train_2), axis=0)
    input_2_train = np.concatenate((input_2_train, input_2_train_2), axis=0)
    print x_train.shape
    print y_train.shape
    print x_train[0]
    print y_train[0]
    print('input_2_train', input_2_train.shape)
    print('input_2_valid', input_2_valid.shape)
    print('input_2_test', input_2_test.shape)
    r = random.random()
    random.shuffle(x_train, lambda: r)
    random.shuffle(y_train, lambda: r)
    random.shuffle(input_2_train_2, lambda: r)
    print x_train.shape
    print y_train.shape
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_valid = x_valid.astype('float32')

    x_train /= 255
    x_valid /= 255
    x_test /= 255

    # 	# break

    if os.path.exists(model_check_pointer_file):
        model.load_weights(model_check_pointer_file)
    # hist = training(model,train_generator,validation_generator,img_rows,img_cols,128,nb_epoch,len(train_file_list),100, nb_worker,model_check_pointer_file)
    # hist = model_training(model,train_generator,validation_generator,img_rows,img_cols,32,nb_epoch,len(train_file_list),model_check_pointer_file)
    # hist = classification_model.model_training_whole(model,x_train,y_train,x_valid,y_valid, batch_size, nb_epoch,model_check_pointer_file)
    # # with open(hist_path, 'w') as f:
    # # 	json.dump(hist.history,f)
    checkpointer = ModelCheckpoint(filepath=model_check_pointer_file,
                                   verbose=1,
                                   save_best_only=True)
    # early_stop = EarlyStopping(monitor = 'val_loss', patience = 5, mode = 'min')
    early_stop = EarlyStopping(monitor='val_loss', patience=30, mode='min')
    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=0.1,
                                  patience=5,
                                  min_lr=0.0001)
    hist = model.fit([x_train, input_2_train],
                     y_train,
                     batch_size=batch_size,
                     nb_epoch=nb_epoch,
                     validation_data=([x_valid, input_2_valid], y_valid),
                     callbacks=[checkpointer, early_stop, reduce_lr],
                     shuffle=True)

    print hist.history, 'hist'
    if os.path.exists(model_check_pointer_file):
        model.load_weights(model_check_pointer_file)
    # model.load_weights(model_check_pointer_file)
    # predictions = model_predicting(model,test_generator,len(y_test))
    predictions = model.predict([x_test, input_2_test])
    _predictions = np.argmax(predictions, 1)
    _labels = np.argmax(y_test, 1)
    write_to_csv(test_file_list, _predictions, _labels, csv_path)
    accuracy, cm = get_accuracy(_predictions, _labels, True)
    print(accuracy, 'test accuracy')
    print(optimizer_choice, 'optimizer_choice')
    print(cm, 'cm')
    cm = cm.tolist()
    with open(confusion_matrix_path, 'w') as f:
        json.dump(cm, f)
    t2 = time.time()
    print('using' + str(t2 - t1))
def main():
    np.random.seed(7)
    t1 = time.time()
    image_path = config.image_path
    track_path = config.track_path
    track_dic_path = config.track_dic_path
    track_dict = load.load_json(track_dic_path)
    intensity_mean, intensity_std = config.intensity_mean, config.intensity_std
    batch_size = config.batch_size
    ModelCheckpoint_file = config.ModelCheckpoint_file
    look_back = config.look_back
    img_rows, img_cols = config.img_rows, config.img_cols
    subdir_list = []
    hist_path = config.hist_path
    model = pretrain_model(look_back, batch_size)
    if os.path.exists(ModelCheckpoint_file):
        print('load  load_weights', ModelCheckpoint_file)
        model.load_weights(ModelCheckpoint_file)
    print(model.summary())
    # train_x = np.random.uniform(0,1,(17, 3, 1, 512, 512))
    # train_y = np.random.uniform(0,1,(17,1))
    # print (train_x)
    # train_x = np.array(train_x,dtype = 'float32')
    # train_y = np.array(train_y,dtype= 'float32')
    # hist = model.fit(train_x, train_y, nb_epoch=1, batch_size=batch_size, verbose=2, validation_split=0.1,shuffle=False)
    """
	count the number of image in each typhoon sequence
	"""
    image_number_dictionary = {}
    for subdirs, dirs, files in os.walk(image_path):
        # print (subdirs)
        subdir_list.append(subdirs)
    for subdir in subdir_list:
        count = 0
        for subdirs, dirs, files in os.walk(subdir):
            for file in files:
                count += 1
        key = subdir.split('/')[-1]
        image_number_dictionary[key] = count
        if count < 24:
            print(key, count)
    # print (image_number_dictionary)
    """
	check the number of images equals the number of track data?
	"""
    # for subdir in subdir_list:
    # 	for subdirs, dirs, files in os.walk(subdir):
    # 		for file in files:
    # 			# print (file)
    # 			[k1, k2] = file.split("-")[:2]
    # 			key = "".join((k1,k2))
    # 			try:
    # 				mark = track_dict[key]
    # 			except KeyError:
    # 				print (file +'do not have track value')

    # for k in track_dict.keys():
    # 	k2 = k[-6:] # typhoon number
    # 	k1 = k[:-6]
    # 	file = k1 +'-' + k2 +'*'
    # 	file_path = image_path + k2 +'/' + file
    # 	if not os.path.isfile(file_path):
    # 		print (file_path not exists)
    track_dict_number = {}
    equal_track_image_list = []
    not_equal_track_image_list = []
    for subdir in subdir_list:
        key = subdir.split('/')[-1]

        if len(key) > 0 and key not in ['201620', '201621', '201622']:
            track_file_path = track_path + key + '.itk'
            with open(track_file_path, 'rb') as tsv_file:
                tsv_reader = csv.reader(tsv_file, delimiter='\t')
                count = 0
                for row in tsv_reader:
                    count += 1
                track_dict_number[key] = count
                if count != image_number_dictionary[key]:
                    not_equal_track_image_list.append(key)
                    # print (key,count,image_number_dictionary[key],'not equal')
                if count == image_number_dictionary[key]:
                    # print  (key,count,image_number_dictionary[key],' equal')
                    equal_track_image_list.append(key)
    # print (not_equal_track_image_list,'not_equal_track_image_list')
    # print (equal_track_image_list,'equal_track_image_list')
    """
	# check_intensities statistics


	data_folder = not_equal_track_image_list + equal_track_image_list
	intensities=[]
	for folder in data_folder:
		file_name = track_path + folder+'.itk'
		with open(file_name,'rb') as tsv_file:
			tsv_reader = csv.reader(tsv_file, delimiter='\t')
			for row in tsv_reader:
				#print row.type
				intensity = float(row[-2])
				intensities.append(intensity)

	intensities = np.array(intensities)
	print intensities
	print intensities.shape
	print np.mean(intensities,axis=0),'mean'
	print np.std(intensities,axis=0),'std'
	print np.min(intensities,axis=0),'min'
	print np.max(intensities,axis =0),'max'
	"""
    print(len(equal_track_image_list), 'lenth of eqaual track image list')
    # "check if track file difference is one hour, result is yes for both equal and not_eqaul_image_list "

    for key in not_equal_track_image_list:
        ts = []
        track_file_path = track_path + key + '.itk'
        with open(track_file_path, 'rb') as tsv_file:
            tsv_reader = csv.reader(tsv_file, delimiter='\t')
            for row in tsv_reader:
                yy = row[0]
                mm = row[1]
                dd = row[2]
                hh = row[3]
                t = datetime.datetime.strptime(
                    yy + ":" + mm + ":" + dd + ':' + hh, '%Y:%m:%d:%H')
                ts.append(t)
        tmp = ts[0]
        for i in range(1, len(ts)):
            dif = (ts[i] - tmp).total_seconds()
            # print (dif,'dif')
            if dif != 3600:
                print(dif, i, key)
            tmp = ts[i]
        # break
    data_folder_path = config.data_folder_path
    if not os.path.exists(data_folder_path):
        equal_track_image_list = np.array(equal_track_image_list)
        np.random.shuffle(equal_track_image_list)
        equal_track_image_list = list(equal_track_image_list)
        # equal_track_image_list = equal_track_image_list[:2]
        train_folder = equal_track_image_list[:int(0.9 *
                                                   len(equal_track_image_list)
                                                   )]
        test_folder = equal_track_image_list[int(0.9 *
                                                 len(equal_track_image_list)):]
        with open(data_folder_path, 'w') as f:
            json.dump(
                {
                    'train_folder': train_folder,
                    'test_folder': test_folder
                }, f)
            print('data_folder_path dumped to: ', data_folder_path)
    else:
        with open(data_folder_path, 'r') as f:
            data_folder = json.load(f)
            train_folder = data_folder['train_folder']
            test_folder = data_folder['test_folder']
            print('load data folder from: ', data_folder_path)
    dataset_image_dic = {}
    dataset_intensity_dic = {}
    dataset_type_dic = {}
    dataset_image_path = 'test_file/dataset_imageset.hdf5'
    dataset_intensity_path = 'test_file/dataset_intensity.hdf5'
    dataset_type_path = 'test_file/dataset_type.hdf5'

    # for key in equal_track_image_list:
    # 	print(key)
    # 	image_folder = image_path + key +'/'
    # 	track_file_path = track_path + key + '.itk'
    # 	dataset_type = prepare_dataset.dataset_1_type(track_file_path)
    # 	print (dataset_type.shape)
    # 	dataset_type_dic[key] = dataset_type
    # 	hf_type.create_dataset(key, data = dataset_type)

    # hf_type.close()

    # equal_track_image_list=equal_track_image_list[:2]
    # if not os.path.exists(dataset_image_path) :
    # 	vgg_model = VGG_16('vgg16_weights.h5')
    # 	sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
    #    	vgg_model.compile(optimizer=sgd, loss='categorical_crossentropy')
    # 	hf_image = h5py.File(dataset_image_path)
    # 	hf_intensity = h5py.File(dataset_intensity_path)

    #

    # 	print ('dumped data into hf_image,intensity')
    # else:
    # 	print ('hf_image intensity exists')
    # 	for key in equal_track_image_list:
    # 		with h5py.File(dataset_image_path,'r') as hf_image:

    # 			dataset_image = np.array(hf_image.get(key))
    # 		with h5py.File(dataset_intensity_path,'r') as hf_intensity:
    # 			dataset_intensity = np.array(hf_intensity.get(key))
    # 		print (key, dataset_image.shape,dataset_intensity.shape)
    # train_selected_folder_index = random.sample(range(0,len(train_folder)),10)
    # test_selected_folder_index = random.sample(range(0,len(test_folder)),10)

    hf_image = h5py.File(dataset_image_path)
    hf_intensity = h5py.File(dataset_intensity_path)
    hf_type = h5py.File(dataset_type_path)
    # for i in train_selected_folder_index:
    # 	key = train_folder[i]
    # train_folder=['201314']
    train_y_types = []
    train_layer_outputs = []
    # train_folder=train_folder[:2]
    # test_folder = test_folder[:2]
    train_folder = ['198811']
    #scatter points
    for key in train_folder:
        print(key)
        if os.path.exists(ModelCheckpoint_file):
            print('load  load_weights', ModelCheckpoint_file)
        model.load_weights(ModelCheckpoint_file)
        dataset_image = np.array(hf_image.get(key))
        dataset_intensity = np.array(hf_intensity.get(key))
        dataset_type = np.array(hf_type.get(key))
        # print (dataset_image.shape,'dataset_image')
        train_x, train_y = prepare_dataset.create_dataset_2_zero(
            dataset_image, dataset_intensity, look_back=look_back)
        train_y_type = prepare_dataset.create_dataset_y_zero(
            dataset_type, look_back=look_back)
        train_x = np.array(train_x, dtype='float32')
        train_y = np.array(train_y, dtype='float32')
        train_y_types += train_y_type
        print len(train_y_type), key
        if train_x.shape[0] > 0:
            train_predict_image = 'test_file/tsne_visualization_12_zero_arrow/' + str(
                key) + '_' + str(look_back) + '_train.png'
            # test_sample = np.array(train_x[0],
            train_outputs = []
            for sample in train_x:
                sample = np.reshape(sample, (-1, look_back, 2048))
                train_output_layer = get_lstm_intermidiate_layer_output(
                    model, sample, layer=-2)
                model.reset_states()

                train_outputs.append(train_output_layer[0])
            # print train_output_layer.shape
            # print train_output_layer
            train_layer_outputs += train_outputs
            train_outputs = np.array(train_outputs)
            print train_outputs.shape
            Y = tsne(train_outputs, 2, 50, 20.0)
            colors = plt.cm.rainbow(np.linspace(0, 1, 8))
            labels_sets = set(train_y_type)
            scatter_dic = {}
            fig = plt.figure()
            for i in labels_sets:
                ii = np.where(train_y_type == i)[0]
                x = [Y[index, 0] for index in ii]
                y = [Y[index, 1] for index in ii]
                scatter_dic[i] = plt.scatter(x, y, color=colors[int(i)])
            line = plt.plot(Y[:, 0], Y[:, 1], 'k')[0]
            add_arrow(line)
            plt.legend(scatter_dic.values(),
                       scatter_dic.keys(),
                       scatterpoints=1,
                       loc='lower left',
                       ncol=6,
                       fontsize=8)
            plt.xlabel(' x')
            plt.ylabel('y')
            plt.title('tsne of lstm feature,' + 'train_predicts_look_back ' +
                      str(look_back) + ', typhoon number ' + str(key))
            # plt.savefig('test_tsne_2.png')
            plt.savefig(train_predict_image)
            plt.close(fig)
            # fig = plt.figure()
            # ax = fig.add_subplot(111, projection='3d')
            # for i in labels_sets:
            # 	ii = np.where(train_y_type == i)[0]
            # 	x = [Y[index,0] for index in ii]
            # 	y = [Y[index,1] for index in ii]
            # 	z = [train_y[index] for index in ii]
            # 	scatter_dic[i] = ax.scatter(x,y,z,color = colors[int(i)])
            # plt.legend(scatter_dic.values(),scatter_dic.keys(),scatterpoints=1,loc='lower left',ncol = 6,fontsize=8)
            # plt.xlabel(' x')
            # plt.ylabel('y')
            # plt.title('3d tsne of lstm feature,' +'train_predicts_look_back ' + str(look_back) + ', typhoon number ' + str(key))
            # plt.savefig('test_tsne_3d_2.png')
            # # plt.savefig(train_predict_image)
            # plt.close(fig)
            # fig = plt.figure()
            #
            # ax.scatter(Y[:,0], Y[:,1], train_y)
            # ax.set_xlabel('X Label')
            # ax.set_ylabel('Y Label')
            # ax.set_zlabel('intensity Label')

            # plt.savefig('tsne_test.png')
            # break
    """
	test_y_types=[]
	test_layer_outputs=[]
	for key in test_folder:
		# key = test_folder[i]
		print (key)
		if os.path.exists(ModelCheckpoint_file):
			print ('load  load_weights',ModelCheckpoint_file)
		model.load_weights(ModelCheckpoint_file)
		dataset_image = np.array(hf_image.get(key))
		dataset_intensity = np.array(hf_intensity.get(key))
		dataset_type = np.array(hf_type.get(key))
		test_x,test_y = prepare_dataset.create_dataset_2_zero(dataset_image, dataset_intensity,look_back = look_back)
		test_x = np.array(test_x,dtype = 'float32')
		test_y = np.array(test_y,dtype = 'float32')
		test_y_type = prepare_dataset.create_dataset_y_zero(dataset_type,look_back=look_back)
		test_y_types += test_y_type
		print len(test_y_type),key
		if test_x.shape[0] > 0:
			test_predict_image = 'test_file/tsne_visualization_12_zero_arrow/' + str(key)+'_'+str(look_back)+'_test.png' 
			# test_sample = np.array(train_x[0],
			test_outputs=[]
			for sample in test_x:
				sample = np.reshape(sample,(-1,look_back,2048))
				test_output_layer = get_lstm_intermidiate_layer_output(model,sample,layer=-2)
				model.reset_states()


				test_outputs.append(test_output_layer[0])
			# print train_output_layer.shape
			# print train_output_layer
			test_layer_outputs += test_outputs
			test_outputs=np.array(test_outputs)
			print test_outputs.shape
			Y = tsne(test_outputs, 2, 50, 20.0);
			colors = plt.cm.rainbow(np.linspace(0, 1, 8))
			labels_sets = set(test_y_type)
			scatter_dic ={}
			fig = plt.figure()
			for i in labels_sets:
				ii = np.where(test_y_type == i)[0]
				x = [Y[index,0] for index in ii]
				y = [Y[index,1] for index in ii]
				scatter_dic[i] = plt.scatter(x,y,color = colors[int(i)])
			line=plt.plot(Y[:,0],Y[:,1],'k')[0]
			add_arrow(line)
			plt.legend(scatter_dic.values(),scatter_dic.keys(),scatterpoints=1,loc='lower left',ncol = 6,fontsize=8)
			plt.xlabel(' x')
			plt.ylabel('y')
			plt.title('tsne of lstm feature,' +'test_predicts_look_back ' + str(look_back) + ', typhoon number ' + str(key))
			# plt.savefig('test_tsne_2.png')
			plt.savefig(test_predict_image)
			plt.close(fig)
			# fig = plt.figure()
			# ax = fig.add_subplot(111, projection='3d')
			# for i in labels_sets:
			# 	ii = np.where(test_y_type == i)[0]
			# 	x = [Y[index,0] for index in ii]
			# 	y = [Y[index,1] for index in ii]
			# 	z = [test_y[index] for index in ii]
			# 	scatter_dic[i] = ax.scatter(x,y,z,color = colors[int(i)])
			# plt.legend(scatter_dic.values(),scatter_dic.keys(),scatterpoints=1,loc='lower left',ncol = 6,fontsize=8)
			# plt.xlabel(' x')
			# plt.ylabel('y')
			# plt.title('3d tsne of lstm feature,' +'test_predicts_look_back ' + str(look_back) + ', typhoon number ' + str(key))
			# plt.savefig('test_tsne_3d_2_test.png')
			# # plt.savefig(train_predict_image)
			# plt.close(fig)
			# fig = plt.figure()
			#
			# ax.scatter(Y[:,0], Y[:,1], train_y)
			# ax.set_xlabel('X Label')
			# ax.set_ylabel('Y Label')
			# ax.set_zlabel('intensity Label')

			# plt.savefig('tsne_test.png')
		# break
	"""
    hf_image.close()
    hf_intensity.close()
    hf_type.close()

    # train_y_types = np.array(train_y_types)
    # train_layer_outputs = np.array(train_layer_outputs)
    # test_y_types = np.array(test_y_types)
    # test_layer_outputs = np.array(test_layer_outputs)
    # print train_y_types.shape
    # print test_y_types.shape
    # Y = tsne(train_layer_outputs, 2, 50, 20.0);
    # colors = plt.cm.rainbow(np.linspace(0, 1, 8))
    # labels_sets = set(train_y_types)
    # scatter_dic ={}
    # fig = plt.figure()
    # train_predict_image = 'test_file/tsne_visualization_12_zero/' +str(look_back)+'_whole_train.png'
    # for i in labels_sets:
    # 	ii = np.where(train_y_types == i)[0]
    # 	x = [Y[index,0] for index in ii]
    # 	y = [Y[index,1] for index in ii]
    # 	scatter_dic[i] = plt.scatter(x,y,color = colors[int(i)])
    # plt.legend(scatter_dic.values(),scatter_dic.keys(),scatterpoints=1,loc='lower left',ncol = 6,fontsize=8)
    # plt.xlabel(' x')
    # plt.ylabel('y')
    # plt.title('tsne of lstm feature,' +'whole train_predicts_look_back ' + str(look_back))
    # # plt.savefig('test_tsne_2.png')
    # plt.savefig(train_predict_image)
    # plt.close(fig)

    # Y = tsne(test_layer_outputs, 2, 50, 20.0);
    # colors = plt.cm.rainbow(np.linspace(0, 1, 8))
    # labels_sets = set(test_y_types)
    # scatter_dic ={}
    # fig = plt.figure()
    # test_predict_image = 'test_file/tsne_visualization_12_zero/' +str(look_back)+'_whole_test.png'
    # for i in labels_sets:
    # 	ii = np.where(test_y_types == i)[0]
    # 	x = [Y[index,0] for index in ii]
    # 	y = [Y[index,1] for index in ii]
    # 	scatter_dic[i] = plt.scatter(x,y,color = colors[int(i)])
    # plt.legend(scatter_dic.values(),scatter_dic.keys(),scatterpoints=1,loc='lower left',ncol = 6,fontsize=8)
    # plt.xlabel(' x')
    # plt.ylabel('y')
    # plt.title('tsne of lstm feature,' +'whole_test_predicts_look_back ' + str(look_back) )
    # # plt.savefig('test_tsne_2.png')
    # plt.savefig(test_predict_image)
    # plt.close(fig)
    """
	train_folder = ['199307']
	for key in train_folder:
		print(key)
		if os.path.exists(ModelCheckpoint_file):
			print ('load  load_weights',ModelCheckpoint_file)
		model.load_weights(ModelCheckpoint_file)
		dataset_image = np.array(hf_image.get(key))
		dataset_intensity = np.array(hf_intensity.get(key))
		dataset_type = np.array(hf_type.get(key))
		# print (dataset_image.shape,'dataset_image')
		train_x,train_y = prepare_dataset.create_dataset_2_zero(dataset_image, dataset_intensity,look_back = look_back)
		train_y_type = prepare_dataset.create_dataset_y_zero(dataset_type,look_back=look_back)
		train_x = np.array(train_x,dtype = 'float32')
		train_y = np.array(train_y,dtype = 'float32')
		train_y_types += train_y_type
		print len(train_y_type),key
		if train_x.shape[0] >0:
			train_predict_image = 'test_file/tsne_visualization_24_zero/' + str(key)+'_'+str(look_back)+'arrow_train.png' 
			# test_sample = np.array(train_x[0],
			train_outputs=[]
			for sample in train_x:
				sample = np.reshape(sample,(-1,look_back,2048))
				train_output_layer = get_lstm_intermidiate_layer_output(model,sample,layer=-2)
				model.reset_states()

				train_outputs.append(train_output_layer[0])
			# print train_output_layer.shape
			# print train_output_layer
			train_layer_outputs += train_outputs
			train_outputs=np.array(train_outputs)
			print train_outputs.shape

			Y = tsne(train_outputs, 2, 50, 20.0);
			# tsne = TSNE(n_components=2, init='pca',random_state = 0)
			# Y = tsne.fit_transform(train_outputs)
			colors = plt.cm.rainbow(np.linspace(0, 1, 8))
			labels_sets = set(train_y_type)
			scatter_dic ={}
			fig = plt.figure()
			for i in labels_sets:
				ii = np.where(train_y_type == i)[0]
				x = [Y[index,0] for index in ii]
				y = [Y[index,1] for index in ii]
				scatter_dic[i] = plt.scatter(x,y,color = colors[int(i)])
			# x = Y[:0]
			# y = Y[:1]
			# print Y.shape
			line=plt.plot(Y[:,0],Y[:,1],'k')[0]
			add_arrow(line)
			# plt.quiver(x[:-1], y[:-1], x[1:]-x[:-1], y[1:]-y[:-1], scale_units='xy', angles='xy', scale=1)
			plt.legend(scatter_dic.values(),scatter_dic.keys(),scatterpoints=1,loc='lower left',ncol = 6,fontsize=8)
			plt.xlabel(' x')
			plt.ylabel('y')
			plt.title('tsne of lstm feature,' +'train_predicts_look_back ' + str(look_back) + ', typhoon number ' + str(key))
			# plt.savefig('test_tsne_2.png')
			plt.savefig(train_predict_image)
			plt.close(fig)
			"""

    t2 = time.time()
    print("using  %s seconds" % (t2 - t1))
Ejemplo n.º 5
0
def main():
    np.random.seed(7)
    t1 = time.time()
    image_path = config.image_path
    track_path = config.track_path
    track_dic_path = config.track_dic_path
    track_dict = load.load_json(track_dic_path)
    intensity_mean, intensity_std = config.intensity_mean, config.intensity_std
    batch_size = config.batch_size
    ModelCheckpoint_file = config.ModelCheckpoint_file
    look_back = config.look_back
    img_rows, img_cols = config.img_rows, config.img_cols
    subdir_list = []
    hist_path = config.hist_path
    model = pretrain_model(look_back, batch_size)
    if os.path.exists(ModelCheckpoint_file):
        print('load  load_weights', ModelCheckpoint_file)
        model.load_weights(ModelCheckpoint_file)
    print(model.summary())
    # train_x = np.random.uniform(0,1,(17, 3, 1, 512, 512))
    # train_y = np.random.uniform(0,1,(17,1))
    # print (train_x)
    # train_x = np.array(train_x,dtype = 'float32')
    # train_y = np.array(train_y,dtype= 'float32')
    # hist = model.fit(train_x, train_y, nb_epoch=1, batch_size=batch_size, verbose=2, validation_split=0.1,shuffle=False)
    """
	count the number of image in each typhoon sequence
	"""
    image_number_dictionary = {}
    for subdirs, dirs, files in os.walk(image_path):
        # print (subdirs)
        subdir_list.append(subdirs)
    for subdir in subdir_list:
        count = 0
        for subdirs, dirs, files in os.walk(subdir):
            for file in files:
                count += 1
        key = subdir.split('/')[-1]
        image_number_dictionary[key] = count
        if count < 24:
            print(key, count)
    # print (image_number_dictionary)
    """
	check the number of images equals the number of track data?
	"""
    # for subdir in subdir_list:
    # 	for subdirs, dirs, files in os.walk(subdir):
    # 		for file in files:
    # 			# print (file)
    # 			[k1, k2] = file.split("-")[:2]
    # 			key = "".join((k1,k2))
    # 			try:
    # 				mark = track_dict[key]
    # 			except KeyError:
    # 				print (file +'do not have track value')

    # for k in track_dict.keys():
    # 	k2 = k[-6:] # typhoon number
    # 	k1 = k[:-6]
    # 	file = k1 +'-' + k2 +'*'
    # 	file_path = image_path + k2 +'/' + file
    # 	if not os.path.isfile(file_path):
    # 		print (file_path not exists)
    track_dict_number = {}
    equal_track_image_list = []
    not_equal_track_image_list = []
    for subdir in subdir_list:
        key = subdir.split('/')[-1]

        if len(key) > 0 and key not in ['201620', '201621', '201622']:
            track_file_path = track_path + key + '.itk'
            with open(track_file_path, 'rb') as tsv_file:
                tsv_reader = csv.reader(tsv_file, delimiter='\t')
                count = 0
                for row in tsv_reader:
                    count += 1
                track_dict_number[key] = count
                if count != image_number_dictionary[key]:
                    not_equal_track_image_list.append(key)
                    # print (key,count,image_number_dictionary[key],'not equal')
                if count == image_number_dictionary[key]:
                    # print  (key,count,image_number_dictionary[key],' equal')
                    equal_track_image_list.append(key)
    # print (not_equal_track_image_list,'not_equal_track_image_list')
    # print (equal_track_image_list,'equal_track_image_list')

    print(len(equal_track_image_list), 'lenth of eqaual track image list')
    # "check if track file difference is one hour, result is yes for both equal and not_eqaul_image_list "

    for key in not_equal_track_image_list:
        ts = []
        track_file_path = track_path + key + '.itk'
        with open(track_file_path, 'rb') as tsv_file:
            tsv_reader = csv.reader(tsv_file, delimiter='\t')
            for row in tsv_reader:
                yy = row[0]
                mm = row[1]
                dd = row[2]
                hh = row[3]
                t = datetime.datetime.strptime(
                    yy + ":" + mm + ":" + dd + ':' + hh, '%Y:%m:%d:%H')
                ts.append(t)
        tmp = ts[0]
        for i in range(1, len(ts)):
            dif = (ts[i] - tmp).total_seconds()
            # print (dif,'dif')
            if dif != 3600:
                print(dif, i, key)
            tmp = ts[i]
        # break
    data_folder_path = config.data_folder_path
    if not os.path.exists(data_folder_path):
        equal_track_image_list = np.array(equal_track_image_list)
        np.random.shuffle(equal_track_image_list)
        equal_track_image_list = list(equal_track_image_list)
        # equal_track_image_list = equal_track_image_list[:2]
        train_folder = equal_track_image_list[:int(0.9 *
                                                   len(equal_track_image_list)
                                                   )]
        test_folder = equal_track_image_list[int(0.9 *
                                                 len(equal_track_image_list)):]
        with open(data_folder_path, 'w') as f:
            json.dump(
                {
                    'train_folder': train_folder,
                    'test_folder': test_folder
                }, f)
            print('data_folder_path dumped to: ', data_folder_path)
    else:
        with open(data_folder_path, 'r') as f:
            data_folder = json.load(f)
            train_folder = data_folder['train_folder']
            test_folder = data_folder['test_folder']
            print('load data folder from: ', data_folder_path)
    dataset_image_dic = {}
    dataset_intensity_dic = {}
    dataset_image_path = 'test_file/dataset_imageset.hdf5'
    dataset_intensity_path = 'test_file/dataset_intensity.hdf5'
    # equal_track_image_list=equal_track_image_list[:2]
    # if not os.path.exists(dataset_image_path) :
    # 	vgg_model = VGG_16('vgg16_weights.h5')
    # 	sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
    #    	vgg_model.compile(optimizer=sgd, loss='categorical_crossentropy')
    # 	hf_image = h5py.File(dataset_image_path)
    # 	hf_intensity = h5py.File(dataset_intensity_path)

    # 	for key in equal_track_image_list:
    # 		print(key)
    # 		image_folder = image_path + key +'/'
    # 		track_file_path = track_path + key + '.itk'
    # 		dataset_image = prepare_dataset.dataset_2(image_folder)
    # 		dataset_input = get_fc2(vgg_model,dataset_image)
    # 		dataset_input = np.array(dataset_input)
    # 		dataset_intensity = prepare_dataset.dataset_1(track_file_path)
    # 		dataset_intensity = prepare_dataset.normalize_intensity(dataset_intensity,intensity_mean,intensity_std)
    # 		print (dataset_input.shape,'dataset_image.shape')
    # 		print (dataset_intensity.shape,'dataset_intensity')
    # 		dataset_image_dic[key] = dataset_input
    # 		dataset_intensity_dic[key] = dataset_intensity
    # 		hf_image.create_dataset(key, data = dataset_input)
    # 		hf_intensity.create_dataset(key, data = dataset_intensity)
    # 	hf_image.close()
    # 	hf_intensity.close()
    # 	print ('dumped data into hf_image,intensity')
    # else:
    # 	print ('hf_image intensity exists')
    # 	for key in equal_track_image_list:
    # 		with h5py.File(dataset_image_path,'r') as hf_image:

    # 			dataset_image = np.array(hf_image.get(key))
    # 		with h5py.File(dataset_intensity_path,'r') as hf_intensity:
    # 			dataset_intensity = np.array(hf_intensity.get(key))
    # 		print (key, dataset_image.shape,dataset_intensity.shape)
    # train_selected_folder_index = random.sample(range(0,len(train_folder)),10)
    # test_selected_folder_index = random.sample(range(0,len(test_folder)),10)

    hf_image = h5py.File(dataset_image_path)
    hf_intensity = h5py.File(dataset_intensity_path)
    # for i in train_selected_folder_index:
    # 	key = train_folder[i]
    for key in train_folder:
        print(key)
        if os.path.exists(ModelCheckpoint_file):
            print('load  load_weights', ModelCheckpoint_file)
        model.load_weights(ModelCheckpoint_file)
        dataset_image = np.array(hf_image.get(key))
        dataset_intensity = np.array(hf_intensity.get(key))
        train_x, train_y = prepare_dataset.create_dataset_2(
            dataset_image, dataset_intensity, look_back=look_back)
        train_x = np.array(train_x, dtype='float32')
        train_y = np.array(train_y, dtype='float32')
        train_predict_image = 'test_file/prediction_output_6_zero_r2/' + str(
            key) + '_' + str(look_back) + '_train.png'
        trainPredict = model.predict(train_x, batch_size=batch_size)
        model.reset_states()
        trainPredict = prepare_dataset.reverse_normalize_intensity(
            trainPredict, intensity_mean, intensity_std)
        trainY = prepare_dataset.reverse_normalize_intensity(
            train_y, intensity_mean, intensity_std)
        fig = plt.figure()
        plt.title('train_predicts_look_back ' + str(look_back) +
                  ', typhoon number ' + str(key))
        z = np.polyfit(trainPredict, trainY, 1)
        p = np.poly1d(z)
        xp = np.linspace(np.min(trainPredict), 1, np.max(trainPredict))
        plt.plot(trainPredict, trainY, '.', xp, p(xp), '-')
        plt.xlabel('prediction value')
        plt.ylabel('true value')
        # plt.legend(loc = 'upper left', shadow =True)
        plt.savefig(train_predict_image)
        plt.close(fig)
        break
    # for i in test_selected_folder_index:
    for key in test_folder:
        # key = test_folder[i]
        print(key)
        if os.path.exists(ModelCheckpoint_file):
            print('load  load_weights', ModelCheckpoint_file)
        model.load_weights(ModelCheckpoint_file)
        dataset_image = np.array(hf_image.get(key))
        dataset_intensity = np.array(hf_intensity.get(key))
        test_x, test_y = prepare_dataset.create_dataset_2(dataset_image,
                                                          dataset_intensity,
                                                          look_back=look_back)
        test_x = np.array(test_x, dtype='float32')
        test_y = np.array(test_y, dtype='float32')
        testPredict = model.predict(test_x, batch_size=batch_size)
        model.reset_states()
        # # # invert predictions
        testPredict = prepare_dataset.reverse_normalize_intensity(
            testPredict, intensity_mean, intensity_std)
        testY = prepare_dataset.reverse_normalize_intensity(
            test_y, intensity_mean, intensity_std)
        test_predict_image = 'test_file/prediction_output_6_zero_r2/' + str(
            key) + '_' + str(look_back) + '_test.png'
        fig = plt.figure()
        z = np.polyfit(testPredict, testY, 1)
        p = np.poly1d(z)
        xp = np.linspace(np.min(testPredict), 1, np.max(testPredict))
        plt.plot(testPredict, testY, '.', xp, p(xp), '-')
        plt.title('test_predicts_look_back ' + str(look_back) +
                  ', typhoon number ' + str(key))
        plt.xlabel('prediction value')
        plt.ylabel('true value')
        # plt.legend(loc = 'upper left', shadow =True)
        plt.savefig(test_predict_image)
        plt.close(fig)
        break
    hf_image.close()
    hf_intensity.close()

    # trainY = prepare_dataset.reverse_normalize_intensity(train_y,intensity_mean,intensity_std)
    # print('load_weights',ModelCheckpoint_file)
    # model.load_weights(ModelCheckpoint_file)
    # trainPredict = model.predict(train_x, batch_size=batch_size)
    # trainPredict = prepare_dataset.reverse_normalize_intensity(trainPredict,intensity_mean,intensity_std)

    # trainY = prepare_dataset.reverse_normalize_intensity(train_y,intensity_mean,intensity_std)
    # trainScore = math.sqrt(mean_squared_error(trainY, trainPredict[:,0]))
    # model.reset_states()

    # print('Train Score: %.2f RMSE' % (trainScore))
    # testPredict = model.predict(test_x, batch_size=batch_size)
    # # # invert predictions
    # testPredict = prepare_dataset.reverse_normalize_intensity(testPredict,intensity_mean,intensity_std)
    # testY = prepare_dataset.reverse_normalize_intensity(test_y,intensity_mean,intensity_std)
    # # # calculate root mean squared error

    # testScore = math.sqrt(mean_squared_error(testY, testPredict[:,0]))
    # print('Test Score: %.2f RMSE' % (testScore))
    # print(look_back,'look_back')
    """
	train_predict_image = config.train_predict_image
	test_predict_image = config.test_predict_image
	fig = plt.figure()
	plt.title('train_predicts_look_back')
	plt.plot(list(trainPredict[:20000,0]),'r--',label= 'train_predict')
	plt.plot(list(trainY[:20000]), 'g--',label = 'train')
	plt.xlabel('typhoon_image')
	plt.ylabel('typhoon intensity')
	plt.legend(loc = 'upper left', shadow =True)
	plt.savefig(train_predict_image)
	plt.close(fig)
	fig = plt.figure()
	plt.title('test_predicts_look_back')
	plt.plot(list(testPredict[:10000,0]),'r--',label= 'test_predict')
	plt.plot(list(testY[:10000]), 'g--',label = 'test')
	plt.xlabel('typhoon_image')
	plt.ylabel('typhoon intensity')
	plt.legend(loc = 'upper left', shadow =True)
	plt.savefig(test_predict_image)
	plt.close(fig)
	"""
    t2 = time.time()
    print("using  %s seconds" % (t2 - t1))
# 23.セクション構造
# 記事中に含まれるセクション名とそのレベル(例えば"== セクション名 =="なら1)を表示せよ

# coding: utf-8

import re
import load

pattern = re.compile(r"""^(={2,})\s*(.+?)\s*\1.*$""", re.MULTILINE)

# 抽出
result = pattern.findall(load.load_json("イギリス"))

# 表示
# '==' => 1
for category in result:
    level = len(category[0]) - 1
    indent = "\t" * (level - 1)
    print(f"{indent}{category[1]}({level})")
Ejemplo n.º 7
0
def main():
    np.random.seed(7)
    t1 = time.time()
    image_path = config.image_path
    track_path = config.track_path
    track_dic_path = config.track_dic_path
    track_dict = load.load_json(track_dic_path)
    intensity_mean, intensity_std = config.intensity_mean, config.intensity_std
    batch_size = config.batch_size
    ModelCheckpoint_file = config.ModelCheckpoint_file
    look_back = config.look_back
    img_rows, img_cols = config.img_rows, config.img_cols
    subdir_list = []
    hist_path = config.hist_path
    model = pretrain_model(look_back, batch_size)
    if os.path.exists(ModelCheckpoint_file):
        print('load  load_weights', ModelCheckpoint_file)
        model.load_weights(ModelCheckpoint_file)
    print(model.summary())
    # train_x = np.random.uniform(0,1,(17, 3, 1, 512, 512))
    # train_y = np.random.uniform(0,1,(17,1))
    # print (train_x)
    # train_x = np.array(train_x,dtype = 'float32')
    # train_y = np.array(train_y,dtype= 'float32')
    # hist = model.fit(train_x, train_y, nb_epoch=1, batch_size=batch_size, verbose=2, validation_split=0.1,shuffle=False)
    """
	count the number of image in each typhoon sequence
	"""
    image_number_dictionary = {}
    for subdirs, dirs, files in os.walk(image_path):
        # print (subdirs)
        subdir_list.append(subdirs)
    for subdir in subdir_list:
        count = 0
        for subdirs, dirs, files in os.walk(subdir):
            for file in files:
                count += 1
        key = subdir.split('/')[-1]
        image_number_dictionary[key] = count
        if count < 24:
            print(key, count)
    # print (image_number_dictionary)
    """
	check the number of images equals the number of track data?
	"""
    # for subdir in subdir_list:
    # 	for subdirs, dirs, files in os.walk(subdir):
    # 		for file in files:
    # 			# print (file)
    # 			[k1, k2] = file.split("-")[:2]
    # 			key = "".join((k1,k2))
    # 			try:
    # 				mark = track_dict[key]
    # 			except KeyError:
    # 				print (file +'do not have track value')

    # for k in track_dict.keys():
    # 	k2 = k[-6:] # typhoon number
    # 	k1 = k[:-6]
    # 	file = k1 +'-' + k2 +'*'
    # 	file_path = image_path + k2 +'/' + file
    # 	if not os.path.isfile(file_path):
    # 		print (file_path not exists)
    track_dict_number = {}
    equal_track_image_list = []
    not_equal_track_image_list = []
    for subdir in subdir_list:
        key = subdir.split('/')[-1]

        if len(key) > 0 and key not in ['201620', '201621', '201622']:
            track_file_path = track_path + key + '.itk'
            with open(track_file_path, 'rb') as tsv_file:
                tsv_reader = csv.reader(tsv_file, delimiter='\t')
                count = 0
                for row in tsv_reader:
                    count += 1
                track_dict_number[key] = count
                if count != image_number_dictionary[key]:
                    not_equal_track_image_list.append(key)
                    # print (key,count,image_number_dictionary[key],'not equal')
                if count == image_number_dictionary[key]:
                    # print  (key,count,image_number_dictionary[key],' equal')
                    equal_track_image_list.append(key)
    # print (not_equal_track_image_list,'not_equal_track_image_list')
    # print (equal_track_image_list,'equal_track_image_list')

    print(len(equal_track_image_list), 'lenth of eqaual track image list')
    # "check if track file difference is one hour, result is yes for both equal and not_eqaul_image_list "

    for key in not_equal_track_image_list:
        ts = []
        track_file_path = track_path + key + '.itk'
        with open(track_file_path, 'rb') as tsv_file:
            tsv_reader = csv.reader(tsv_file, delimiter='\t')
            for row in tsv_reader:
                yy = row[0]
                mm = row[1]
                dd = row[2]
                hh = row[3]
                t = datetime.datetime.strptime(
                    yy + ":" + mm + ":" + dd + ':' + hh, '%Y:%m:%d:%H')
                ts.append(t)
        tmp = ts[0]
        for i in range(1, len(ts)):
            dif = (ts[i] - tmp).total_seconds()
            # print (dif,'dif')
            if dif != 3600:
                print(dif, i, key)
            tmp = ts[i]
        # break
    data_folder = 'test_file/sorted_intensity_data_folder.json'
    # intensity_min = {}
    # for key in equal_track_image_list:
    # 	track_file_path = track_path + key + '.itk'
    # 	dataset_intensity = prepare_dataset.dataset_1(track_file_path)
    # 	intensity_min[key] = min(dataset_intensity)
    # sorted_intensity_min = sorted(intensity_min.iteritems(), key=lambda (k,v): (v,k))
    # sorted_intensity_min =list(sorted_intensity_min)
    # sorted_intensity_min = np.array(sorted_intensity_min)
    # sorted_data_folder = sorted_intensity_min[:,0]
    # train_folder=[]
    # test_folder=[]
    # for i in range(0,len(sorted_data_folder),10):
    # 	if (i+10) <= len(sorted_data_folder):
    # 		j = i+10
    # 	else:
    # 		j = len(sorted_data_folder) +1
    # 	small_list = sorted_data_folder[i:j]
    # 	small_list = np.array(small_list)
    # 	np.random.shuffle(small_list)
    # 	small_list = list(small_list)
    # 	train_folder += small_list[:int(0.9*len(small_list))]
    # 	test_folder += small_list[int(0.9*len(small_list)):]
    # print train_folder
    # print test_folder
    # print len(train_folder)
    # print len(test_folder)
    # with open(data_folder,'w') as f:
    # 	json.dump({'train_folder':train_folder,'test_folder':test_folder},f)
    """

	# get train test data from pre_built dataset
	
	"""
    # dataset_imageset
    # 0.423964 mean data
    # 0.569374 std data
    # 0.0 min
    # 4.71836 max
    dataset_image_path = 'test_file/dataset_imageset.hdf5'
    dataset_intensity_path = 'test_file/dataset_intensity.hdf5'

    hf_image = h5py.File(dataset_image_path)
    hf_intensity = h5py.File(dataset_intensity_path)

    train_x = []
    train_y = []
    test_x = []
    test_y = []

    vgg_fc2_mean = config.vgg_fc2_mean
    vgg_fc2_std = config.vgg_fc2_std
    with open(data_folder, 'r') as f:
        data_folder = json.load(f)
        train_folder = data_folder['train_folder']
        test_folder = data_folder['test_folder']
    train_folder = np.array(train_folder)
    test_folder = np.array(test_folder)
    np.random.shuffle(train_folder)
    np.random.shuffle(test_folder)
    train_folder = list(train_folder)
    test_folder = list(test_folder)
    for key in train_folder:
        print(key)
        dataset_image = np.array(hf_image.get(key))
        # dataset_image = prepare_dataset.normalize_intensity(dataset_image,vgg_fc2_mean,vgg_fc2_std) #normalize image (the same function of normalize intensity)
        dataset_intensity = np.array(hf_intensity.get(key))
        if len(dataset_intensity) > look_back:
            data_x, data_y = prepare_dataset.extend_dataset_2_zero(
                dataset_image, dataset_intensity, look_back=look_back)
            train_x += data_x
            train_y += data_y

    for key in test_folder:
        print(key)
        dataset_image = np.array(hf_image.get(key))
        # dataset_image = prepare_dataset.normalize_intensity(dataset_image,vgg_fc2_mean,vgg_fc2_std)
        dataset_intensity = np.array(hf_intensity.get(key))
        if len(dataset_intensity) > look_back:
            data_x, data_y = prepare_dataset.extend_dataset_2_zero(
                dataset_image, dataset_intensity, look_back=look_back)
            test_x += data_x
            test_y += data_y
    # train = train_x + test_x
    train_x = np.array(train_x, dtype='float32')
    train_y = np.array(train_y, dtype='float32')
    test_x = np.array(test_x, dtype='float32')
    test_y = np.array(test_y, dtype='float32')
    print(train_x.shape, train_y.shape)
    print(test_x.shape, test_y.shape)

    train_hists = []
    validation_hists = []
    val_loss = sys.float_info.max

    for i in range(1000):
        print(i, 'epoch')
        # ModelCheckpoint_file = 'test_file/orig_weights_lstm_1.0_image_lookback_'+str(look_back)+str(i)+'_whole_equal.hdf5'
        # print('start train')
        hist = model.fit(train_x,
                         train_y,
                         nb_epoch=1,
                         batch_size=batch_size,
                         verbose=2,
                         validation_split=0.1,
                         shuffle=False)
        model.reset_states()
        train_hists.append(hist.history['loss'][0])
        validation_hists.append(hist.history['val_loss'][0])
        if val_loss > hist.history['val_loss'][0]:
            model.save_weights(ModelCheckpoint_file)
            print(i, val_loss, '->', hist.history['val_loss'][0],
                  'save_weights', ModelCheckpoint_file)
            val_loss = hist.history['val_loss'][0]
    print(train_hists, 'train_hists')
    print(validation_hists, 'validation_hists')
    with open(hist_path, 'w') as f:
        json.dump({'train_loss': train_hists, 'val_loss': validation_hists}, f)
    # hist = model.fit(train_x, train_y, nb_epoch=2, batch_size=batch_size, verbose=2, validation_split = 0.1,shuffle=False)
    # break
    # with open(hist_path,'w') as j:
    # 	json.dump(hist.history,j)
    # validation_hists_least_index = validation_hists.index(min(validation_hists))
    # print ('ModelCheckpoint_file','test_file/orig_weights_lstm_1.0_image_lookback_'+str(look_back)+str(validation_hists_least_index)+'_whole_equal.hdf5')
    # model.load_weights('test_file/orig_weights_lstm_1.0_image_lookback_'+str(look_back)+str(validation_hists_least_index)+'_whole_equal.hdf5')

    print('load_weights', ModelCheckpoint_file)
    model.load_weights(ModelCheckpoint_file)
    trainPredict = model.predict(train_x, batch_size=batch_size)
    trainPredict = prepare_dataset.reverse_normalize_intensity(
        trainPredict, intensity_mean, intensity_std)

    trainY = prepare_dataset.reverse_normalize_intensity(
        train_y, intensity_mean, intensity_std)
    trainScore = math.sqrt(mean_squared_error(trainY, trainPredict[:, 0]))
    model.reset_states()

    print('Train Score: %.2f RMSE' % (trainScore))
    testPredict = model.predict(test_x, batch_size=batch_size)
    # # invert predictions
    testPredict = prepare_dataset.reverse_normalize_intensity(
        testPredict, intensity_mean, intensity_std)
    testY = prepare_dataset.reverse_normalize_intensity(
        test_y, intensity_mean, intensity_std)
    # # calculate root mean squared error

    testScore = math.sqrt(mean_squared_error(testY, testPredict[:, 0]))
    print('Test Score: %.2f RMSE' % (testScore))
    print(look_back, 'look_back')

    t2 = time.time()
    print("using  %s seconds" % (t2 - t1))
def main():
	np.random.seed(7)
	t1 = time.time()
	image_path = config.image_path
	track_path = config.track_path
	track_dic_path = config.track_dic_path
	track_dict = load.load_json(track_dic_path)
	intensity_mean,intensity_std = config.intensity_mean, config.intensity_std
	batch_size = config.batch_size
	# ModelCheckpoint_file = config.ModelCheckpoint_file
	# ModelCheckpoint_file = 'test_file/orig_weights_lstm_1.0_image_lookback_24_whole_equal_pretrain_epoch_1000_adadelta_0.0001_prediction_zero.hdf5'
	# ModelCheckpoint_file='test_file/orig_weights_lstm_1.0_image_lookback_6_whole_equal_pretrain_epoch_1000_adadelta_0.0001_server_2.hdf5'
	# look_back = config.look_back
	look_back = 24
	img_rows,img_cols = config.img_rows,config.img_cols
	subdir_list = []
	hist_path = config.hist_path
	model = pretrain_model(look_back,batch_size)
	# ModelCheckpoint_file = 'test_file/orig_weights_lstm_1.0_image_lookback_6_whole_equal_pretrain_epoch_1000_adadelta_0.0001_04_13.hdf5'
	# ModelCheckpoint_file ='test_file/orig_weights_lstm_1.0_image_lookback_12_whole_equal_pretrain_epoch_1000_adadelta_0.0001_prediction_zero.hdf5'
	# ModelCheckpoint_file = 'test_file/orig_weights_lstm_1.0_image_lookback_24_whole_equal_pretrain_epoch_1000_adadelta_0.0001_prediction_zero.hdf5'
	# ModelCheckpoint_file = 'test_file/orig_weights_lstm_1.0_image_lookback_6_whole_equal_pretrain_epoch_1000_adadelta_0.0001_04_13.hdf5'
	# ModelCheckpoint_file = 'test_file/orig_hist_lstm_1.0_image_lookback_12_whole_equal_pretrain__epoch_1000_adadelta_0.0001_prediction.hdf5'
	ModelCheckpoint_file = 'test_file/orig_weights_lstm_1.0_image_lookback_24_whole_equal_pretrain_epoch_1000_adadelta_0.0001_04_13.hdf5'
	if os.path.exists(ModelCheckpoint_file):
		print ('load  load_weights',ModelCheckpoint_file)
		model.load_weights(ModelCheckpoint_file)
	print(model.summary())
	# train_x = np.random.uniform(0,1,(17, 3, 1, 512, 512))
	# train_y = np.random.uniform(0,1,(17,1))
	# print (train_x)
	# train_x = np.array(train_x,dtype = 'float32')
	# train_y = np.array(train_y,dtype= 'float32')
	# hist = model.fit(train_x, train_y, nb_epoch=1, batch_size=batch_size, verbose=2, validation_split=0.1,shuffle=False)

	"""
	count the number of image in each typhoon sequence
	"""
	image_number_dictionary={}
	for  subdirs, dirs, files in os.walk(image_path):
		# print (subdirs)
		subdir_list.append(subdirs)
	for subdir in subdir_list:
		count = 0
		for subdirs, dirs, files in os.walk(subdir):
			for file in files:
				count += 1
		key = subdir.split('/')[-1]
		image_number_dictionary[key] = count
		if count < 24:
			print (key,count)
	# print (image_number_dictionary)

	"""
	check the number of images equals the number of track data?
	"""
	# for subdir in subdir_list:
	# 	for subdirs, dirs, files in os.walk(subdir):
	# 		for file in files:
	# 			# print (file)
	# 			[k1, k2] = file.split("-")[:2]
	# 			key = "".join((k1,k2))
	# 			try:
	# 				mark = track_dict[key]
	# 			except KeyError:
	# 				print (file +'do not have track value')
	

# for k in track_dict.keys():
# 	k2 = k[-6:] # typhoon number
# 	k1 = k[:-6]
# 	file = k1 +'-' + k2 +'*'
# 	file_path = image_path + k2 +'/' + file
# 	if not os.path.isfile(file_path):
# 		print (file_path not exists)
	track_dict_number ={}
	equal_track_image_list = []
	not_equal_track_image_list = []
	for subdir in subdir_list:
		key =subdir.split('/')[-1] 

		if len(key) > 0 and key not in ['201620','201621','201622']:
			track_file_path = track_path + key+'.itk'
			with open(track_file_path,'rb') as tsv_file:
				tsv_reader = csv.reader(tsv_file, delimiter='\t')
				count = 0
				for row in tsv_reader:
					count += 1
				track_dict_number[key] = count
				if count != image_number_dictionary[key]:
					not_equal_track_image_list.append(key)
					# print (key,count,image_number_dictionary[key],'not equal')
				if count == image_number_dictionary[key]:
					# print  (key,count,image_number_dictionary[key],' equal')
					equal_track_image_list.append(key)
	# print (not_equal_track_image_list,'not_equal_track_image_list')
	# print (equal_track_image_list,'equal_track_image_list')
	"""
	# check_intensities statistics


	data_folder = not_equal_track_image_list + equal_track_image_list
	intensities=[]
	for folder in data_folder:
		file_name = track_path + folder+'.itk'
		with open(file_name,'rb') as tsv_file:
			tsv_reader = csv.reader(tsv_file, delimiter='\t')
			for row in tsv_reader:
				#print row.type
				intensity = float(row[-2])
				intensities.append(intensity)

	intensities = np.array(intensities)
	print intensities
	print intensities.shape
	print np.mean(intensities,axis=0),'mean'
	print np.std(intensities,axis=0),'std'
	print np.min(intensities,axis=0),'min'
	print np.max(intensities,axis =0),'max'
	"""
	print (len(equal_track_image_list),'lenth of eqaual track image list')
	# "check if track file difference is one hour, result is yes for both equal and not_eqaul_image_list "

	for key in not_equal_track_image_list:
			ts =[]
			track_file_path = track_path + key+'.itk'
			with open(track_file_path,'rb') as tsv_file:
				tsv_reader = csv.reader(tsv_file, delimiter='\t')
				for row in tsv_reader:
					yy = row[0]
					mm = row[1]
					dd = row[2]
					hh = row[3]
					t = datetime.datetime.strptime(yy +":" + mm +":" + dd +':' +hh, '%Y:%m:%d:%H')
					ts.append(t)
			tmp = ts[0]
			for i in range(1,len(ts)):
				dif = (ts[i] - tmp).total_seconds()
				# print (dif,'dif')
				if dif != 3600:
					print (dif,i,key)
				tmp = ts[i]
			# break
	data_folder_path = config.data_folder_path
	# data_folder_path ='test_file/sorted_intensity_data_folder.json'
	if not os.path.exists(data_folder_path): 
		equal_track_image_list = np.array(equal_track_image_list)
		np.random.shuffle(equal_track_image_list)
		equal_track_image_list = list(equal_track_image_list)
		# equal_track_image_list = equal_track_image_list[:2]
		train_folder = equal_track_image_list[:int(0.9 * len(equal_track_image_list))]
		test_folder = equal_track_image_list[int(0.9* len(equal_track_image_list)):]
		with open(data_folder_path,'w') as f:
			json.dump({'train_folder':train_folder,'test_folder': test_folder},f)
			print ('data_folder_path dumped to: ',data_folder_path)
	else:
		with open(data_folder_path,'r') as f:
			data_folder = json.load(f)
			train_folder = data_folder['train_folder']
			test_folder = data_folder['test_folder']
			print ('load data folder from: ' , data_folder_path)
	dataset_image_dic = {}
	dataset_intensity_dic ={}
	dataset_image_path = 'test_file/dataset_imageset.hdf5'
	dataset_intensity_path = 'test_file/dataset_intensity.hdf5'
	# equal_track_image_list=equal_track_image_list[:2]
	# if not os.path.exists(dataset_image_path) :
	# 	vgg_model = VGG_16('vgg16_weights.h5')
	# 	sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
	#    	vgg_model.compile(optimizer=sgd, loss='categorical_crossentropy')
	# 	hf_image = h5py.File(dataset_image_path)
	# 	hf_intensity = h5py.File(dataset_intensity_path)
		
	# 	for key in equal_track_image_list:
	# 		print(key)
	# 		image_folder = image_path + key +'/'
	# 		track_file_path = track_path + key + '.itk'
	# 		dataset_image = prepare_dataset.dataset_2(image_folder)
	# 		dataset_input = get_fc2(vgg_model,dataset_image)
	# 		dataset_input = np.array(dataset_input)
	# 		dataset_intensity = prepare_dataset.dataset_1(track_file_path)
	# 		dataset_intensity = prepare_dataset.normalize_intensity(dataset_intensity,intensity_mean,intensity_std)
	# 		print (dataset_input.shape,'dataset_image.shape')
	# 		print (dataset_intensity.shape,'dataset_intensity')
	# 		dataset_image_dic[key] = dataset_input
	# 		dataset_intensity_dic[key] = dataset_intensity
	# 		hf_image.create_dataset(key, data = dataset_input)
	# 		hf_intensity.create_dataset(key, data = dataset_intensity)
	# 	hf_image.close()
	# 	hf_intensity.close()
	# 	print ('dumped data into hf_image,intensity')
	# else:
	# 	print ('hf_image intensity exists')
	# 	for key in equal_track_image_list:
	# 		with h5py.File(dataset_image_path,'r') as hf_image:
			
	# 			dataset_image = np.array(hf_image.get(key))
	# 		with h5py.File(dataset_intensity_path,'r') as hf_intensity:
	# 			dataset_intensity = np.array(hf_intensity.get(key))
	# 		print (key, dataset_image.shape,dataset_intensity.shape)
	# train_selected_folder_index = random.sample(range(0,len(train_folder)),10)
	# test_selected_folder_index = random.sample(range(0,len(test_folder)),10)

	hf_image = h5py.File(dataset_image_path)
	hf_intensity = h5py.File(dataset_intensity_path)
	# for i in train_selected_folder_index:
	# 	key = train_folder[i]
	# train_folder=['201314']
	# train_folder=['199406']
	# train_folder = train_folder[:2]
	# test_folder = test_folder[:2]
	csv_file = 'test_file/24_prediction_error_each_typhoon.csv'

	with open(csv_file, 'wb') as csvfile:
		writer = csv.writer(csvfile, delimiter=' ')
		for key in train_folder:
			print(key)
			if os.path.exists(ModelCheckpoint_file):
				print ('load  load_weights',ModelCheckpoint_file)
			model.load_weights(ModelCheckpoint_file)
			dataset_image = np.array(hf_image.get(key))
			dataset_intensity = np.array(hf_intensity.get(key))
			# print (dataset_image.shape,'dataset_image')
			if len(dataset_intensity) > look_back:
				train_x,train_y = prepare_dataset.create_dataset_2(dataset_image, dataset_intensity,look_back = look_back)
				train_x = np.array(train_x,dtype = 'float32')
				train_y = np.array(train_y,dtype = 'float32')
				if train_x.shape[0] >0:
					# train_predict_image = 'test_file/prediction_output_6_04_13_old_version/' + str(key)+'_'+str(look_back)+'_train.png' 
					trainPredict = model.predict(train_x, batch_size=batch_size)
					model.reset_states()
					trainPredict = prepare_dataset.reverse_normalize_intensity(trainPredict,intensity_mean,intensity_std)
					trainY = prepare_dataset.reverse_normalize_intensity(train_y,intensity_mean,intensity_std)
					# print (trainPredict,'train_predict')
					# print (trainY,'trainY')
					trainScore = math.sqrt(mean_squared_error(trainY, trainPredict[:,0]))
					print('Train Score: %.2f RMSE' % (trainScore))
					writer.writerow([key,'train',trainScore])
		# for i in test_selected_folder_index:
		for key in test_folder:
			# key = test_folder[i]
			print (key)
			if os.path.exists(ModelCheckpoint_file):
				print ('load  load_weights',ModelCheckpoint_file)
			model.load_weights(ModelCheckpoint_file)
			dataset_image = np.array(hf_image.get(key))
			dataset_intensity = np.array(hf_intensity.get(key))
			if len(dataset_intensity) > look_back:
				test_x,test_y = prepare_dataset.create_dataset_2(dataset_image, dataset_intensity,look_back = look_back)
				test_x = np.array(test_x,dtype = 'float32')
				test_y = np.array(test_y,dtype = 'float32')
				if test_x.shape[0] > 0:
					testPredict = model.predict(test_x, batch_size=batch_size)
					model.reset_states()
				# # # invert predictions
					testPredict = prepare_dataset.reverse_normalize_intensity(testPredict,intensity_mean,intensity_std)
					testY = prepare_dataset.reverse_normalize_intensity(test_y,intensity_mean,intensity_std)
					testScore = math.sqrt(mean_squared_error(testY, testPredict[:,0]))
					writer.writerow([key,'test',testScore])
	hf_image.close()
	hf_intensity.close()

	t2 = time.time()
	print ("using  %s seconds" % (t2-t1))
def main():
    np.random.seed(7)
    t1 = time.time()
    image_path = config.image_path
    track_path = config.track_path
    track_dic_path = config.track_dic_path
    track_dict = load.load_json(track_dic_path)
    intensity_mean, intensity_std = config.intensity_mean, config.intensity_std
    batch_size = config.batch_size
    ModelCheckpoint_file = config.ModelCheckpoint_file
    look_back = config.look_back
    img_rows, img_cols = config.img_rows, config.img_cols
    subdir_list = []
    hist_path = config.hist_path
    mean_v, std_v = config.mean_v, config.std_v
    intensity_mean, intensity_std = config.intensity_mean, config.intensity_std
    model = pretrain_model(look_back, batch_size)
    if os.path.exists(ModelCheckpoint_file):
        print('load  load_weights', ModelCheckpoint_file)
        model.load_weights(ModelCheckpoint_file)
    print(model.summary())
    # train_x = np.random.uniform(0,1,(17, 3, 1, 512, 512))
    # train_y = np.random.uniform(0,1,(17,1))
    # print (train_x)
    # train_x = np.array(train_x,dtype = 'float32')
    # train_y = np.array(train_y,dtype= 'float32')
    # hist = model.fit(train_x, train_y, nb_epoch=1, batch_size=batch_size, verbose=2, validation_split=0.1,shuffle=False)
    """
	count the number of image in each typhoon sequence
	"""
    image_number_dictionary = {}
    for subdirs, dirs, files in os.walk(image_path):
        # print (subdirs)
        subdir_list.append(subdirs)
    for subdir in subdir_list:
        count = 0
        for subdirs, dirs, files in os.walk(subdir):
            for file in files:
                count += 1
        key = subdir.split('/')[-1]
        image_number_dictionary[key] = count
        if count < 24:
            print(key, count)
    # print (image_number_dictionary)
    """
	check the number of images equals the number of track data?
	"""
    # for subdir in subdir_list:
    # 	for subdirs, dirs, files in os.walk(subdir):
    # 		for file in files:
    # 			# print (file)
    # 			[k1, k2] = file.split("-")[:2]
    # 			key = "".join((k1,k2))
    # 			try:
    # 				mark = track_dict[key]
    # 			except KeyError:
    # 				print (file +'do not have track value')

    # for k in track_dict.keys():
    # 	k2 = k[-6:] # typhoon number
    # 	k1 = k[:-6]
    # 	file = k1 +'-' + k2 +'*'
    # 	file_path = image_path + k2 +'/' + file
    # 	if not os.path.isfile(file_path):
    # 		print (file_path not exists)
    track_dict_number = {}
    equal_track_image_list = []
    not_equal_track_image_list = []
    for subdir in subdir_list:
        key = subdir.split('/')[-1]

        if len(key) > 0 and key not in ['201620', '201621', '201622']:
            track_file_path = track_path + key + '.itk'
            with open(track_file_path, 'rb') as tsv_file:
                tsv_reader = csv.reader(tsv_file, delimiter='\t')
                count = 0
                for row in tsv_reader:
                    count += 1
                track_dict_number[key] = count
                if count != image_number_dictionary[key]:
                    not_equal_track_image_list.append(key)
                    # print (key,count,image_number_dictionary[key],'not equal')
                if count == image_number_dictionary[key]:
                    # print  (key,count,image_number_dictionary[key],' equal')
                    equal_track_image_list.append(key)
    # print (not_equal_track_image_list,'not_equal_track_image_list')
    # print (equal_track_image_list,'equal_track_image_list')

    print(len(equal_track_image_list), 'lenth of eqaual track image list')
    # "check if track file difference is one hour, result is yes for both equal and not_eqaul_image_list "

    for key in not_equal_track_image_list:
        ts = []
        track_file_path = track_path + key + '.itk'
        with open(track_file_path, 'rb') as tsv_file:
            tsv_reader = csv.reader(tsv_file, delimiter='\t')
            for row in tsv_reader:
                yy = row[0]
                mm = row[1]
                dd = row[2]
                hh = row[3]
                t = datetime.datetime.strptime(
                    yy + ":" + mm + ":" + dd + ':' + hh, '%Y:%m:%d:%H')
                ts.append(t)
        tmp = ts[0]
        for i in range(1, len(ts)):
            dif = (ts[i] - tmp).total_seconds()
            # print (dif,'dif')
            if dif != 3600:
                print(dif, i, key)
            tmp = ts[i]
        # break
    dataset_imageset_path = 'test_file/dataset_image_unequal.hdf5'
    dataset_intensity_path = 'test_file/dataset_intensity_unequal.hdf5'
    # hf_image = h5py.File(dataset_imageset_path)
    # hf_intensity = h5py.File(dataset_intensity_path)
    vgg_model = VGG_16('vgg16_weights.h5')
    sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
    vgg_model.compile(optimizer=sgd, loss='categorical_crossentropy')
    for key in not_equal_track_image_list:
        # # for key in equal_track_image_list:
        image_folder = image_path + key + '/'
        # 	dataset_x,dataset_y = prepare_dataset.dataset_1_2(image_folder,track_dict)
        # 	print dataset_x.shape
        # 	print dataset_y.shape
        # 	break
        file_path_list = []
        # print key
        dataset_image = []
        dataset_intensity = []
        for subdirs, dirs, files in os.walk(image_folder):
            for file in files:
                file_path = os.path.join(subdirs, file)
                file_path_list.append(file_path)
        sorted_file_list = sorted(
            file_path_list, key=lambda x: int(x.split('/')[-1].split('-')[-4]))
        # print (len(sorted_file_list),'len of sorted_file_list')
        ts = []
        intensities = []
        for file_path in sorted_file_list:
            yymmddhh = file_path.split('/')[-1].split('-')[-4]
            track_key = yymmddhh + key
            intensities.append(float(track_dict[track_key][-2]))
            t = datetime.datetime.strptime(yymmddhh, '%Y%m%d%H')
            ts.append(t)
        # print len(ts),'len ts'
        tmp = ts[0]
        orig_image = load.get_x(sorted_file_list, img_rows, img_cols, mean_v,
                                std_v)
        tmp_image = orig_image[0]
        # 		dataset_input = get_fc2(vgg_model,dataset_image)
        # 		dataset_input = np.array(dataset_input)

        dataset_image.append(orig_image[0])
        dataset_intensity.append(intensities[0])
        for i in range(1, len(ts)):
            dif = (ts[i] - tmp).total_seconds()
            # print (dif,'dif')
            if dif != 3600:
                print(dif / 3600.0, i, key, ts[i])
                for j in range(1, int(dif / 3600.0)):
                    t2 = tmp + datetime.timedelta(seconds=3600)
                    yy = t2.year
                    mm = str(t2.month).zfill(2)
                    dd = str(t2.day).zfill(2)
                    hh = str(t2.hour).zfill(2)
                    yymmddhh = str(yy) + mm + dd + hh
                    track_key = yymmddhh + key
                    intensity = float(track_dict[track_key][-2])
                    image = (1 - (float(j) / (dif / 3600.0))) * tmp_image + (
                        float(j) / (dif / 3600.0)) * orig_image[i]
                    dataset_image.append(image)
                    dataset_intensity.append(intensity)
            dataset_image.append(orig_image[i])
            dataset_intensity.append(intensities[i])

            tmp = ts[i]
            tmp_image = orig_image[i]
        # dataset_image = np.array(dataset_image)
        for i in range(len(dataset_image)):
            show_image(
                dataset_image[i][0], 'test_file/unequal_image_generate_test/' +
                str(key) + '_' + str(i) + '.jpg')

        # dataset_input = get_fc2(vgg_model,dataset_image)
        # dataset_intensity = np.array(dataset_intensity)
        # dataset_intensity = prepare_dataset.normalize_intensity(dataset_intensity, intensity_mean,intensity_std)
        # hf_image.create_dataset(key, data = dataset_input)
        # hf_intensity.create_dataset(key, data = dataset_intensity)
        # break
    # hf_image.close()
    # hf_intensity.close()

    t2 = time.time()
    print("using  %s seconds" % (t2 - t1))
def main():
    np.random.seed(7)
    t1 = time.time()
    image_path = config.image_path
    track_path = config.track_path
    track_dic_path = config.track_dic_path
    track_dict = load.load_json(track_dic_path)
    intensity_mean, intensity_std = config.intensity_mean, config.intensity_std
    batch_size = config.batch_size
    ModelCheckpoint_file = config.ModelCheckpoint_file
    look_back = config.look_back
    img_rows, img_cols = config.img_rows, config.img_cols
    subdir_list = []
    hist_path = config.hist_path

    # train_x = np.random.uniform(0,1,(17, 3, 1, 512, 512))
    # train_y = np.random.uniform(0,1,(17,1))
    # print (train_x)
    # train_x = np.array(train_x,dtype = 'float32')
    # train_y = np.array(train_y,dtype= 'float32')
    # hist = model.fit(train_x, train_y, nb_epoch=1, batch_size=batch_size, verbose=2, validation_split=0.1,shuffle=False)
    """
	count the number of image in each typhoon sequence
	"""
    image_number_dictionary = {}
    for subdirs, dirs, files in os.walk(image_path):
        # print (subdirs)
        subdir_list.append(subdirs)
    for subdir in subdir_list:
        count = 0
        for subdirs, dirs, files in os.walk(subdir):
            for file in files:
                count += 1
        key = subdir.split('/')[-1]
        image_number_dictionary[key] = count
        if count < 24:
            print(key, count)
    # print (image_number_dictionary)
    """
	check the number of images equals the number of track data?
	"""
    # for subdir in subdir_list:
    # 	for subdirs, dirs, files in os.walk(subdir):
    # 		for file in files:
    # 			# print (file)
    # 			[k1, k2] = file.split("-")[:2]
    # 			key = "".join((k1,k2))
    # 			try:
    # 				mark = track_dict[key]
    # 			except KeyError:
    # 				print (file +'do not have track value')

    # for k in track_dict.keys():
    # 	k2 = k[-6:] # typhoon number
    # 	k1 = k[:-6]
    # 	file = k1 +'-' + k2 +'*'
    # 	file_path = image_path + k2 +'/' + file
    # 	if not os.path.isfile(file_path):
    # 		print (file_path not exists)
    track_dict_number = {}
    equal_track_image_list = []
    not_equal_track_image_list = []
    for subdir in subdir_list:
        key = subdir.split('/')[-1]

        if len(key) > 0 and key not in ['201620', '201621', '201622']:
            track_file_path = track_path + key + '.itk'
            with open(track_file_path, 'rb') as tsv_file:
                tsv_reader = csv.reader(tsv_file, delimiter='\t')
                count = 0
                for row in tsv_reader:
                    count += 1
                track_dict_number[key] = count
                if count != image_number_dictionary[key]:
                    not_equal_track_image_list.append(key)
                    # print (key,count,image_number_dictionary[key],'not equal')
                if count == image_number_dictionary[key]:
                    # print  (key,count,image_number_dictionary[key],' equal')
                    equal_track_image_list.append(key)
    # print (not_equal_track_image_list,'not_equal_track_image_list')
    # print (equal_track_image_list,'equal_track_image_list')
    """
	# check_intensities statistics


	data_folder = not_equal_track_image_list + equal_track_image_list
	intensities=[]
	for folder in data_folder:
		file_name = track_path + folder+'.itk'
		with open(file_name,'rb') as tsv_file:
			tsv_reader = csv.reader(tsv_file, delimiter='\t')
			for row in tsv_reader:
				#print row.type
				intensity = float(row[-2])
				intensities.append(intensity)

	intensities = np.array(intensities)
	print intensities
	print intensities.shape
	print np.mean(intensities,axis=0),'mean'
	print np.std(intensities,axis=0),'std'
	print np.min(intensities,axis=0),'min'
	print np.max(intensities,axis =0),'max'
	"""
    print(len(equal_track_image_list), 'lenth of eqaual track image list')
    # "check if track file difference is one hour, result is yes for both equal and not_eqaul_image_list "

    for key in not_equal_track_image_list:
        ts = []
        track_file_path = track_path + key + '.itk'
        with open(track_file_path, 'rb') as tsv_file:
            tsv_reader = csv.reader(tsv_file, delimiter='\t')
            for row in tsv_reader:
                yy = row[0]
                mm = row[1]
                dd = row[2]
                hh = row[3]
                t = datetime.datetime.strptime(
                    yy + ":" + mm + ":" + dd + ':' + hh, '%Y:%m:%d:%H')
                ts.append(t)
        tmp = ts[0]
        for i in range(1, len(ts)):
            dif = (ts[i] - tmp).total_seconds()
            # print (dif,'dif')
            if dif != 3600:
                print(dif, i, key)
            tmp = ts[i]
        # break
    data_folder_path = config.data_folder_path
    if not os.path.exists(data_folder_path):
        equal_track_image_list = np.array(equal_track_image_list)
        np.random.shuffle(equal_track_image_list)
        equal_track_image_list = list(equal_track_image_list)
        # equal_track_image_list = equal_track_image_list[:2]
        train_folder = equal_track_image_list[:int(0.9 *
                                                   len(equal_track_image_list)
                                                   )]
        test_folder = equal_track_image_list[int(0.9 *
                                                 len(equal_track_image_list)):]
        with open(data_folder_path, 'w') as f:
            json.dump(
                {
                    'train_folder': train_folder,
                    'test_folder': test_folder
                }, f)
            print('data_folder_path dumped to: ', data_folder_path)
    else:
        with open(data_folder_path, 'r') as f:
            data_folder = json.load(f)
            train_folder = data_folder['train_folder']
            test_folder = data_folder['test_folder']
            print('load data folder from: ', data_folder_path)
    dataset_image_dic = {}
    dataset_intensity_dic = {}
    dataset_image_path = 'test_file/dataset_imageset.hdf5'
    dataset_intensity_path = 'test_file/dataset_intensity.hdf5'
    dataset_type_path = 'test_file/dataset_type.hdf5'
    # equal_track_image_list=equal_track_image_list[:2]
    # if not os.path.exists(dataset_image_path) :
    # 	vgg_model = VGG_16('vgg16_weights.h5')
    # 	sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
    #    	vgg_model.compile(optimizer=sgd, loss='categorical_crossentropy')
    # 	hf_image = h5py.File(dataset_image_path)
    # 	hf_intensity = h5py.File(dataset_intensity_path)

    # 	for key in equal_track_image_list:
    # 		print(key)
    # 		image_folder = image_path + key +'/'
    # 		track_file_path = track_path + key + '.itk'
    # 		dataset_image = prepare_dataset.dataset_2(image_folder)
    # 		dataset_input = get_fc2(vgg_model,dataset_image)
    # 		dataset_input = np.array(dataset_input)
    # 		dataset_intensity = prepare_dataset.dataset_1(track_file_path)
    # 		dataset_intensity = prepare_dataset.normalize_intensity(dataset_intensity,intensity_mean,intensity_std)
    # 		print (dataset_input.shape,'dataset_image.shape')
    # 		print (dataset_intensity.shape,'dataset_intensity')
    # 		dataset_image_dic[key] = dataset_input
    # 		dataset_intensity_dic[key] = dataset_intensity
    # 		hf_image.create_dataset(key, data = dataset_input)
    # 		hf_intensity.create_dataset(key, data = dataset_intensity)
    # 	hf_image.close()
    # 	hf_intensity.close()
    # 	print ('dumped data into hf_image,intensity')
    # else:
    # 	print ('hf_image intensity exists')
    # 	for key in equal_track_image_list:
    # 		with h5py.File(dataset_image_path,'r') as hf_image:

    # 			dataset_image = np.array(hf_image.get(key))
    # 		with h5py.File(dataset_intensity_path,'r') as hf_intensity:
    # 			dataset_intensity = np.array(hf_intensity.get(key))
    # 		print (key, dataset_image.shape,dataset_intensity.shape)
    # train_selected_folder_index = random.sample(range(0,len(train_folder)),10)
    # test_selected_folder_index = random.sample(range(0,len(test_folder)),10)

    hf_image = h5py.File(dataset_image_path)
    hf_intensity = h5py.File(dataset_intensity_path)
    hf_type = h5py.File(dataset_type_path)
    # for i in train_selected_folder_index:
    # 	key = train_folder[i]
    # train_folder=['201314']
    model = pretrain_model(look_back, batch_size)
    csv_path_train = 'test_file/train_prediction_compare_initilization_or_no_look_back_6/'
    csv_path_test = 'test_file/test_prediction_compare_initilization_or_no_look_back_6/'
    ModelCheckpoint_file_2 = config.ModelCheckpoint_file_2
    ModelCheckpoint_file = config.ModelCheckpoint_file
    # train_folder=train_folder[:3]
    train_error_10 = 0.0
    train_error_10_2 = 0.0
    train_error_10_p = 0.0
    train_error_10_2_p = 0.0
    count = 0.0
    train_error_extra = 0.0
    train_error_tropical = 0.0
    train_error_tropical_p = 0.0
    train_error_extra_p = 0.0
    count_extra = 0.0
    count_trop = 0.0
    for key in train_folder:
        print(key)

        dataset_image = np.array(hf_image.get(key))
        dataset_intensity = np.array(hf_intensity.get(key))
        dataset_type = np.array(hf_type.get((key)))
        # print (dataset_image.shape,'dataset_image')
        train_x, train_y = prepare_dataset.create_dataset_2_zero(
            dataset_image, dataset_intensity, look_back=look_back)
        train_x_2, train_y_2 = prepare_dataset.extend_dataset_2_zero(
            dataset_image, dataset_intensity, look_back=look_back)
        train_type = prepare_dataset.create_dataset_y_zero(dataset_type,
                                                           look_back=look_back)
        train_x = np.array(train_x, dtype='float32')
        train_y = np.array(train_y, dtype='float32')
        train_x_2 = np.array(train_x_2, dtype='float32')
        train_type = np.array(train_type)

        if train_x.shape[0] > 0:
            if os.path.exists(ModelCheckpoint_file):
                print('load  load_weights', ModelCheckpoint_file)
                model.load_weights(ModelCheckpoint_file)

            trainPredict = model.predict(train_x, batch_size=batch_size)
            model.reset_states()
            trainPredict = prepare_dataset.reverse_normalize_intensity(
                trainPredict, intensity_mean, intensity_std)
            trainY = prepare_dataset.reverse_normalize_intensity(
                train_y, intensity_mean, intensity_std)
            # print (trainPredict,'train_predict')
            # print (trainY,'trainY')
            if train_x_2.shape[0] > 0:
                if os.path.exists(ModelCheckpoint_file_2):
                    print('load  load_weights', ModelCheckpoint_file_2)
                    model.load_weights(ModelCheckpoint_file_2)

                trainPredict_2 = model.predict(train_x_2,
                                               batch_size=batch_size)
                model.reset_states()
                trainPredict_2 = prepare_dataset.reverse_normalize_intensity(
                    trainPredict_2, intensity_mean, intensity_std)
            if len(trainPredict) == len(trainPredict_2[20:]):
                csv_path_1 = csv_path_train + str(key) + '.csv'
                if not os.path.exists(csv_path_train):
                    os.mkdir(csv_path_train)
                print 'writing to csv' + key
                if int(len(trainPredict)) >= 10:
                    count += 1
                    train_error_10 += math.sqrt(
                        mean_squared_error(trainPredict[:10], trainY[:10]))
                    train_error_10_2 += math.sqrt(
                        mean_squared_error(trainPredict_2[20:30], trainY[:10]))
                    train_error_10_p += np.sum(
                        np.power(
                            (trainPredict[:10] - trainY[:10]), 2)) / np.sum(
                                np.power((trainPredict - trainY), 2))
                    train_error_10_2_p += np.sum(
                        np.power((trainPredict_2[20:30] - trainY[:10]),
                                 2)) / np.sum(
                                     np.power(
                                         (trainPredict_2[20:] - trainY), 2))
                    if 6 in train_type:
                        train_error_extra += math.sqrt(
                            mean_squared_error(trainPredict_2[20:], trainY))
                        # train_error_extra_p += np.sum(np.power((trainPredict_2[20:0.1*len(trainPredict) +20]-trainY[:int(0.1*len(trainPredict))]),2))/np.sum(np.power((trainPredict_2[20:]-trainY),2))
                        count_extra += 1
                    if 6 not in train_type:
                        train_error_tropical += math.sqrt(
                            mean_squared_error(trainPredict_2[20:], trainY))
                        # train_error_tropical_p += np.sum(np.power((trainPredict_2[20:0.1*len(trainPredict) +20]-trainY[:int(0.1*len(trainPredict))]),2))/np.sum(np.power((trainPredict_2[20:]-trainY),2))
                        count_trop += 1
                trainPredict = np.reshape(trainPredict, (len(trainPredict), 1))
                trainPredict_2 = np.reshape(np.array(trainPredict_2[20:]),
                                            (len(trainY), 1))
                trainY = np.reshape(np.array(trainY), (len(trainY), 1))
                train_type = np.reshape(np.array(train_type), (len(trainY), 1))
                zz = np.concatenate(
                    (trainPredict, trainPredict_2, trainY, train_type), 1)
                with open(csv_path_1, 'wb') as f:
                    writer = csv.writer(f, delimiter=',')
                    writer.writerow([
                        'predictions_no_initialization',
                        'predictions_with_initializations', 'intensity_true',
                        'type_true'
                    ])
                    writer.writerows(zz)
    print train_error_10, 'train_error_10'
    print train_error_10_p, 'train_error_10_p'
    print train_error_10_2, 'train_error_10_2'
    print train_error_10_2_p, 'train_error_10_2_p'
    print 'divide by len(train_folder)', count
    print train_error_10 / count, 'train_error_10'
    print train_error_10_p / count, 'train_error_10_p'
    print train_error_10_2 / count, 'train_error_10_2'
    print train_error_10_2_p / count, 'train_error_10_2_p'
    print train_error_tropical, 'train_error_tropical'
    print train_error_extra, 'train_error_extra'
    print count_trop, 'count_trop'
    print count_extra, 'count_extra'
    print train_error_tropical / count_trop, 'trop div'
    print train_error_extra / count_extra, 'trop_extra'
    # for i in test_selected_folder_index:
    # test_folder = test_folder[:3]
    test_error_10 = 0.0
    test_error_10_2 = 0.0
    test_error_10_p = 0.0
    test_error_10_2_p = 0.0
    test_error_extra = 0.0
    test_error_tropical = 0.0
    test_error_extra_p = 0.0
    test_error_tropical_p = 0.0
    count = 0.0
    count_extra = 0.0
    count_trop = 0.0
    for key in test_folder:
        # key = test_folder[i]
        print(key)

        model.load_weights(ModelCheckpoint_file)
        dataset_image = np.array(hf_image.get(key))
        dataset_intensity = np.array(hf_intensity.get(key))
        dataset_type = np.array(hf_type.get((key)))
        test_x, test_y = prepare_dataset.create_dataset_2_zero(
            dataset_image, dataset_intensity, look_back=look_back)
        test_x_2, test_y_2 = prepare_dataset.extend_dataset_2_zero(
            dataset_image, dataset_intensity, look_back=look_back)
        test_type = prepare_dataset.create_dataset_y_zero(dataset_type,
                                                          look_back=look_back)
        test_x = np.array(test_x, dtype='float32')
        test_y = np.array(test_y, dtype='float32')
        test_x_2 = np.array(test_x_2, dtype='float32')
        test_type = np.array(test_type)
        if test_x.shape[0] > 0:
            if os.path.exists(ModelCheckpoint_file):
                print('load  load_weights', ModelCheckpoint_file)
                model.load_weights(ModelCheckpoint_file)
            testPredict = model.predict(test_x, batch_size=batch_size)
            model.reset_states()
            # # # invert predictions
            testPredict = prepare_dataset.reverse_normalize_intensity(
                testPredict, intensity_mean, intensity_std)
            testY = prepare_dataset.reverse_normalize_intensity(
                test_y, intensity_mean, intensity_std)
            if test_x_2.shape[0] > 0:
                if os.path.exists(ModelCheckpoint_file_2):
                    print('load  load_weights', ModelCheckpoint_file_2)
                    model.load_weights(ModelCheckpoint_file_2)

                testPredict_2 = model.predict(test_x_2, batch_size=batch_size)
                model.reset_states()
                testPredict_2 = prepare_dataset.reverse_normalize_intensity(
                    testPredict_2, intensity_mean, intensity_std)
            if len(testPredict) == len(testPredict_2[20:]):
                csv_path_1 = csv_path_test + str(key) + '.csv'
                if not os.path.exists(csv_path_test):
                    os.mkdir(csv_path_test)
                print 'writing to csv' + key
                if int(len(testPredict)) >= 10:
                    count += 1
                    test_error_10 += math.sqrt(
                        mean_squared_error(testPredict[:10], testY[:10]))
                    test_error_10_2 += math.sqrt(
                        mean_squared_error(testPredict_2[20:30], testY[:10]))
                    test_error_10_p += np.sum(
                        np.power((testPredict[:10] - testY[:10]), 2)) / np.sum(
                            np.power((testPredict - testY), 2))
                    test_error_10_2_p += np.sum(
                        np.power(
                            (testPredict_2[20:30] - testY[:10]), 2)) / np.sum(
                                np.power((testPredict_2[20:] - testY), 2))
                    if 6 in test_type:
                        test_error_extra += math.sqrt(
                            mean_squared_error(testPredict_2[20:], testY))
                        # test_error_extra_p += np.sum(np.power((testPredict_2[20:0.1*len(testPredict) +20]-testY[:int(0.1*len(testPredict))]),2))/np.sum(np.power((testPredict_2[20:]-testY),2))
                        count_extra += 1
                    if 6 not in test_type:
                        test_error_tropical += math.sqrt(
                            mean_squared_error(testPredict_2[20:], testY))
                        # test_error_tropical_p += np.sum(np.power((testPredict_2[20:0.1*len(testPredict) +20]-testY[:int(0.1*len(testPredict))]),2))/np.sum(np.power((testPredict_2[20:]-testY),2))
                        count_trop += 1
                testPredict = np.reshape(testPredict, (len(testPredict), 1))
                testPredict_2 = np.reshape(np.array(testPredict_2[20:]),
                                           (len(testY), 1))
                testY = np.reshape(np.array(testY), (len(testY), 1))
                test_type = np.reshape(np.array(test_type), (len(testY), 1))
                zz = np.concatenate(
                    (testPredict, testPredict_2, testY, test_type), 1)
                with open(csv_path_1, 'wb') as f:
                    writer = csv.writer(f, delimiter=',')
                    writer.writerow([
                        'predictions_no_initialization',
                        'predictions_with_initializations', 'intensity_true',
                        'type_true'
                    ])
                    writer.writerows(zz)

    print test_error_10, 'test_error_10'
    print test_error_10_2, 'test_error_10_2'
    print test_error_10_p, 'test_error_10_p'
    print test_error_10_2_p, 'test_error_10_2_p'
    print 'divide by len(test_folder)', count
    print test_error_10 / count, 'test_error_10'
    print test_error_10_2 / count, 'test_error_10_2'
    print test_error_10_p / count, 'test_error_10_p'
    print test_error_10_2_p / count, 'test_error_10_2_p'
    print test_error_extra, 'test_error_extra'
    print test_error_tropical, 'test_error_tropical'
    # print test_error_extra_p,'test_error_extra_p'
    # print test_error_tropical_p,'test_error_tropical_p'
    print 'number of extra tropical', count_extra
    print 'number of tropical ', count_trop
    print test_error_extra / count_extra, 'test_error_extra/count_extra'
    print test_error_tropical / count_trop, 'test_error_tropical/count_trop'
    # print test_error_extra_p/count_extra,'test_error_extra_p/count_extra'
    # print test_error_tropical_p/count_trop,'test_error_tropical_p/count_trop'
    hf_image.close()
    hf_intensity.close()
    hf_type.close()
    t2 = time.time()
    print("using  %s seconds" % (t2 - t1))
def main():
    np.random.seed(7)
    # trackDictPath = config.track_dic_path
    # track_dict = load.load_json(trackDictPath)
    track_path = config.track_path
    suspicious_file_list_path = config.suspicious_file_list_path
    suspicious_file_list = load.load_json(suspicious_file_list_path)
    train_validation_test_subdirs_split = config.train_validation_test_subdirs_split
    intensity_mean, intensity_std = config.intensity_mean, config.intensity_std
    batch_size = config.batch_size
    ModelCheckpoint_file = config.ModelCheckpoint_file
    train_predict_image = config.train_predict_image
    test_predict_image = config.test_predict_image
    look_back = 3
    file_list = []
    for subdir, dirs, files in os.walk(track_path):
        for file in files:
            file_path = os.path.join(subdir, file)
            file_list.append(file_path)
    file_list = np.array(file_list)
    np.random.shuffle(file_list)
    file_list = list(file_list)
    file_list = file_list[:10]
    # print (file_list)
    # for file in file_list:
    # 	if len(file) <=2:
    # 		print (file)
    # 		print (file_list.index(file))
    # file_list = file_list[:10]
    train_file_list = file_list[:int(0.9 * len(file_list))]
    test_file_list = file_list[int(0.9 * len(file_list)):]
    # print(train_file_list)
    trainX = []
    trainY = []
    testX = []
    testY = []
    dataset_count = 0
    for file in train_file_list:
        try:
            data = prepare_dataset.dataset_1(file)
            data = prepare_dataset.normalize_intensity(data, intensity_mean,
                                                       intensity_std)
            # data = list(data)
            trainXx, trainYy = prepare_dataset.create_dataset(data, look_back)
            trainX += trainXx
            trainY += trainYy
            dataset_count += data.shape[0]
        except:
            print(file)

    for file in test_file_list:
        try:
            data = prepare_dataset.dataset_1(file)
            data = prepare_dataset.normalize_intensity(data, intensity_mean,
                                                       intensity_std)
            # data = list(data)
            testXx, testYy = prepare_dataset.create_dataset(data, look_back)
            testX += testXx
            testY += testYy
            dataset_count += data.shape[0]
        except:
            print(file)

    trainX = np.array(trainX, dtype='float32')
    trainY = np.array(trainY, dtype='float32')
    testX = np.array(testX, dtype='float32')
    testY = np.array(testY, dtype='float32')

    print(trainX.shape)
    print(testX.shape)

    trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
    testX = np.reshape(testX, (testX.shape[0], testX.shape[1], 1))
    batch_size = 1
    model = Sequential()
    model.add(
        LSTM(4, batch_input_shape=(batch_size, look_back, 1), stateful=True))
    model.add(Dense(3))
    model.compile(loss='mean_squared_error', optimizer='adam')
    # checkpointer = ModelCheckpoint(filepath=ModelCheckpoint_file, verbose=2, save_best_only=True)
    hists = []
    for i in range(10):
        hist = model.fit(trainX,
                         trainY,
                         nb_epoch=1,
                         batch_size=batch_size,
                         verbose=2,
                         shuffle=False)
        model.reset_states()
        hists.append(hist.history['loss'][0])
    print(hists, 'hists')
    # model.save_weights(ModelCheckpoint_file)
    # make predictions

    trainPredict = model.predict(trainX, batch_size=batch_size)
    model.reset_states()
    testPredict = model.predict(testX, batch_size=batch_size)
    # invert predictions
    trainPredict = prepare_dataset.reverse_normalize_intensity(
        trainPredict, intensity_mean, intensity_std)
    trainY = prepare_dataset.reverse_normalize_intensity(
        trainY, intensity_mean, intensity_std)
    testPredict = prepare_dataset.reverse_normalize_intensity(
        testPredict, intensity_mean, intensity_std)
    testY = prepare_dataset.reverse_normalize_intensity(
        testY, intensity_mean, intensity_std)
    # calculate root mean squared error
    # print (trainPredict[:,0], 'trainPredict')
    # print (trainPredict.shape,'len_train_predict')
    # print(trainY[0],'trainY')
    trainScore = math.sqrt(mean_squared_error(trainY, trainPredict[:, 0]))
    print('Train Score: %.2f RMSE' % (trainScore))
    testScore = math.sqrt(mean_squared_error(testY, testPredict[:, 0]))
    print('Test Score: %.2f RMSE' % (testScore))
    dataset = np.zeros((dataset_count, 1), dtype='float32')

    # trainPredictPlot = np.empty_like(dataset)
    # trainPredictPlot[:, :] = np.nan
    # trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict
    # # shift test predictions for plotting
    # testPredictPlot = np.empty_like(dataset)
    # testPredictPlot[:, :] = np.nan
    # testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict
    # # plt.plot(dataset))
    fig = plt.figure()
    plt.title('train_predicts_look_back')
    plt.plot(list(trainPredict[:, 0]), 'r--', label='train_predict')
    plt.plot(list(trainY), 'g--', label='train')
    plt.legend(loc='upper left', shadow=True)
    plt.xlabel('typhoon_image')
    plt.ylael('typhoon intensity')
    plt.savefig(train_predict_image)
    plt.close(fig)
    fig = plt.figure()
    plt.title('test_predicts_look_back')
    plt.plot(list(testPredict[:, 0]), 'r--', label='test_predict')
    plt.plot(list(testY), 'g--', label='test')
    plt.xlabel('typhoon_image')
    plt.ylael('typhoon intensity')
    plt.legend(loc='upper left', shadow=True)
    plt.savefig(test_predict_image)
    plt.close(fig)
Ejemplo n.º 12
0
def main():
    np.random.seed(7)
    # trackDictPath = config.track_dic_path
    # track_dict = load.load_json(trackDictPath)
    track_path = config.track_path
    suspicious_file_list_path = config.suspicious_file_list_path
    suspicious_file_list = load.load_json(suspicious_file_list_path)
    train_validation_test_subdirs_split = config.train_validation_test_subdirs_split
    intensity_mean, intensity_std = config.intensity_mean, config.intensity_std
    batch_size = config.batch_size
    ModelCheckpoint_file = 'test_file/orig_weights_lstm_1.0_lookback_24.hdf5'
    print('ModelCheckpoint_file',
          ModelCheckpoint_file)  #config.ModelCheckpoint_file
    look_back = 1
    batch_size = 1
    print(look_back, 'look_back')
    file_list = []
    model = lstm_model_1(batch_size, look_back)
    # model.load_weights(ModelCheckpoint_file)
    for subdir, dirs, files in os.walk(track_path):
        for file in files:
            file_path = os.path.join(subdir, file)
            file_list.append(file_path)
    file_list = np.array(file_list)
    np.random.shuffle(file_list)
    file_list = list(file_list)
    # file_list = file_list[:10]
    # print (file_list)
    # for file in file_list:
    # 	if len(file) <=2:
    # 		print (file)
    # 		print (file_list.index(file))
    file_list = file_list[:10]
    train_file_list = file_list[:int(0.9 * len(file_list))]
    # validation_file_list = file_list[int(0.85*len(file_list)):int(0.9*len(file_list))]
    test_file_list = file_list[int(0.9 * len(file_list)):]
    print(len(train_file_list))
    # print (len(validation_file_list))
    print(len(test_file_list))

    testX = []
    testY = []
    # dataset_count = 0
    train_histss = []
    validation_histss = []
    train_file_list_copy = train_file_list
    # trainXS=np.array([]).reshape(0,look_back)
    # print (trainXS.shape,'trainxs shape')
    # trainYS = np.array([]).reshape(0,1)
    trainXS = []
    trainYS = []
    for i in np.arange(0, len(train_file_list_copy),
                       12):  #len(train_file_list_copy)
        trainX = []
        trainY = []
        train_hists = []
        validation_hists = []
        print(i, 'i')
        train_file_list = train_file_list_copy[i:i + 12]
        # print len(train_file_list)
        for file in train_file_list:
            # print file
            # try:
            data = prepare_dataset.dataset_1(file)
            data = prepare_dataset.normalize_intensity(data, intensity_mean,
                                                       intensity_std)
            # data = list(data)
            trainXx, trainYy = prepare_dataset.create_dataset(data, look_back)
            trainX += trainXx
            trainY += trainYy
            # print (trainX,'trainX')
            # print (trainY,'trainY')
            # break
            # dataset_count += data.shape[0]
            # except:
            # 	print(file,'error')
        trainX = np.array(trainX, dtype='float32')
        trainY = np.array(trainY, dtype='float32')
        # print (trainX.shape)
        # print(trainY.shape,'trainY SHAPE')
        trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
        # trainXS = np.vstack((trainXS, trainX))
        # trainYS = np.vstack((trainYS, trainY))
        # print (trainXS.shape,'trainxs shape')
        # break
        # return

        trainXS.append(trainX)
        trainYS.append(trainY)
        """
		training
		"""

        for i in range(100):
            hist = model.fit(trainX,
                             trainY,
                             nb_epoch=1,
                             batch_size=batch_size,
                             verbose=2,
                             validation_split=0.1,
                             shuffle=False)
            model.reset_states()
            train_hists.append(hist.history['loss'][0])
            validation_hists.append(hist.history['val_loss'][0])
        # print (hists,'hists')
        train_histss.append(train_hists)
        validation_histss.append(validation_hists)
    print(train_histss, 'train_histss')
    print(validation_histss, 'validation_histss')
    """
def main():

    np.random.seed(7)
    t1 = time.time()
    """
	get the config information for the model
	"""
    image_path = config.image_path  # the image dataset root folder
    track_path = config.track_path  # the track dataset root folder
    track_dic_path = config.track_dic_path  # the track dict  path which I configured from the track dataset
    track_dict = load.load_json(track_dic_path)
    intensity_mean, intensity_std = config.intensity_mean, config.intensity_std  # get the intensity mean, std information
    batch_size = config.batch_size
    ModelCheckpoint_file = config.ModelCheckpoint_file  # the path which saved the weights
    # ModelCheckpoint_file = 'test_file/orig_weights_lstm_1.0_image_lookback_24_whole_equal_pretrain_epoch_1000_adadelta_0.0001_zero_prediction_initial_normalization_unequal_equal_whole.hdf5'
    ModelCheckpoint_file = 'test_file/orig_weights_lstm_1.0_image_lookback_24_whole_equal_pretrain_epoch_1000_adadelta_0.0001_prediction_initial_normalization.hdf5'
    # ModelCheckpoint_file = 'test_file/orig_weights_lstm_1'
    look_back = config.look_back  # the time difference which you set
    # look_back = 6
    img_rows, img_cols = config.img_rows, config.img_cols  # image dimension which you used 224,224
    subdir_list = []
    hist_path = config.hist_path  # the hist path which you saved for keras
    model = pretrain_model(look_back, batch_size)  # get the lstm model
    if os.path.exists(ModelCheckpoint_file):
        print('load  load_weights', ModelCheckpoint_file)
        model.load_weights(ModelCheckpoint_file)
    print(model.summary())
    # train_x = np.random.uniform(0,1,(17, 3, 1, 512, 512))
    # train_y = np.random.uniform(0,1,(17,1))
    # print (train_x)
    # train_x = np.array(train_x,dtype = 'float32')
    # train_y = np.array(train_y,dtype= 'float32')
    # hist = model.fit(train_x, train_y, nb_epoch=1, batch_size=batch_size, verbose=2, validation_split=0.1,shuffle=False)
    """
	count the number of image in each typhoon sequence
	"""
    """
	image_number_dictionary={}
	for  subdirs, dirs, files in os.walk(image_path):
		# print (subdirs)
		subdir_list.append(subdirs)
	for subdir in subdir_list:
		count = 0
		for subdirs, dirs, files in os.walk(subdir):
			for file in files:
				count += 1
		key = subdir.split('/')[-1]
		image_number_dictionary[key] = count
		if count < 24:
			print (key,count)
	"""
    # print (image_number_dictionary)
    """
	check the number of images equals the number of track data?
	"""
    # for subdir in subdir_list:
    # 	for subdirs, dirs, files in os.walk(subdir):
    # 		for file in files:
    # 			# print (file)
    # 			[k1, k2] = file.split("-")[:2]
    # 			key = "".join((k1,k2))
    # 			try:
    # 				mark = track_dict[key]
    # 			except KeyError:
    # 				print (file +'do not have track value')

    # for k in track_dict.keys():
    # 	k2 = k[-6:] # typhoon number
    # 	k1 = k[:-6]
    # 	file = k1 +'-' + k2 +'*'
    # 	file_path = image_path + k2 +'/' + file
    # 	if not os.path.isfile(file_path):
    # 		print (file_path not exists)
    """
	get the equal_data folder in which the image data is not lost, and the uneuqal data folder in which the image data is lost
	"""
    track_dict_number = {}
    equal_track_image_list = []
    not_equal_track_image_list = []
    for subdir in subdir_list:
        key = subdir.split('/')[-1]

        if len(key) > 0 and key not in ['201620', '201621', '201622']:
            track_file_path = track_path + key + '.itk'
            with open(track_file_path, 'rb') as tsv_file:
                tsv_reader = csv.reader(tsv_file, delimiter='\t')
                count = 0
                for row in tsv_reader:
                    count += 1
                track_dict_number[key] = count
                if count != image_number_dictionary[key]:
                    not_equal_track_image_list.append(key)
                    # print (key,count,image_number_dictionary[key],'not equal')
                if count == image_number_dictionary[key]:
                    # print  (key,count,image_number_dictionary[key],' equal')
                    equal_track_image_list.append(key)
    # print (not_equal_track_image_list,'not_equal_track_image_list')
    # print (equal_track_image_list,'equal_track_image_list')
    """
	# check_intensities statistics


	data_folder = not_equal_track_image_list + equal_track_image_list
	intensities=[]
	for folder in data_folder:
		file_name = track_path + folder+'.itk'
		with open(file_name,'rb') as tsv_file:
			tsv_reader = csv.reader(tsv_file, delimiter='\t')
			for row in tsv_reader:
				#print row.type
				intensity = float(row[-2])
				intensities.append(intensity)

	intensities = np.array(intensities)
	print intensities
	print intensities.shape
	print np.mean(intensities,axis=0),'mean'
	print np.std(intensities,axis=0),'std'
	print np.min(intensities,axis=0),'min'
	print np.max(intensities,axis =0),'max'
	"""
    print(len(equal_track_image_list), 'lenth of eqaual track image list')
    # "check if track file difference is one hour, result is yes for both equal and not_eqaul_image_list "
    """
	for key in not_equal_track_image_list:
			ts =[]
			track_file_path = track_path + key+'.itk'
			with open(track_file_path,'rb') as tsv_file:
				tsv_reader = csv.reader(tsv_file, delimiter='\t')
				for row in tsv_reader:
					yy = row[0]
					mm = row[1]
					dd = row[2]
					hh = row[3]
					t = datetime.datetime.strptime(yy +":" + mm +":" + dd +':' +hh, '%Y:%m:%d:%H')
					ts.append(t)
			tmp = ts[0]
			for i in range(1,len(ts)):
				dif = (ts[i] - tmp).total_seconds()
				# print (dif,'dif')
				if dif != 3600:
					print (dif,i,key)
				tmp = ts[i]
			# break
	"""

    data_folder_path = config.data_folder_path  # train and test data folder
    # data_folder_path ='test_file/sorted_intensity_data_folder.json'
    if not os.path.exists(data_folder_path):
        equal_track_image_list = np.array(equal_track_image_list)
        np.random.shuffle(equal_track_image_list)
        equal_track_image_list = list(equal_track_image_list)
        # equal_track_image_list = equal_track_image_list[:2]
        train_folder = equal_track_image_list[:int(0.9 *
                                                   len(equal_track_image_list)
                                                   )]
        test_folder = equal_track_image_list[int(0.9 *
                                                 len(equal_track_image_list)):]
        with open(data_folder_path, 'w') as f:
            json.dump(
                {
                    'train_folder': train_folder,
                    'test_folder': test_folder
                }, f)
            print('data_folder_path dumped to: ', data_folder_path)
    else:
        with open(data_folder_path, 'r') as f:
            data_folder = json.load(f)
            train_folder = data_folder['train_folder']
            test_folder = data_folder['test_folder']
            print('load data folder from: ', data_folder_path)
    dataset_image_dic = {}
    dataset_intensity_dic = {}
    dataset_image_path = 'test_file/dataset_imageset.hdf5'  # the image dataset which I get from the last layer of convolutional neural network, key: typhoon number, value: a list of images
    dataset_intensity_path = 'test_file/dataset_intensity.hdf5'  # the intensity dataset , key: typhoon number, value: a list of intensity
    # equal_track_image_list=equal_track_image_list[:2]
    # if not os.path.exists(dataset_image_path) :
    # 	vgg_model = VGG_16('vgg16_weights.h5')
    # 	sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
    #    	vgg_model.compile(optimizer=sgd, loss='categorical_crossentropy')
    # 	hf_image = h5py.File(dataset_image_path)
    # 	hf_intensity = h5py.File(dataset_intensity_path)

    # 	for key in equal_track_image_list:
    # 		print(key)
    # 		image_folder = image_path + key +'/'
    # 		track_file_path = track_path + key + '.itk'
    # 		dataset_image = prepare_dataset.dataset_2(image_folder)
    # 		dataset_input = get_fc2(vgg_model,dataset_image)
    # 		dataset_input = np.array(dataset_input)
    # 		dataset_intensity = prepare_dataset.dataset_1(track_file_path)
    # 		dataset_intensity = prepare_dataset.normalize_intensity(dataset_intensity,intensity_mean,intensity_std)
    # 		print (dataset_input.shape,'dataset_image.shape')
    # 		print (dataset_intensity.shape,'dataset_intensity')
    # 		dataset_image_dic[key] = dataset_input
    # 		dataset_intensity_dic[key] = dataset_intensity
    # 		hf_image.create_dataset(key, data = dataset_input)
    # 		hf_intensity.create_dataset(key, data = dataset_intensity)
    # 	hf_image.close()
    # 	hf_intensity.close()
    # 	print ('dumped data into hf_image,intensity')
    # else:
    # 	print ('hf_image intensity exists')
    # 	for key in equal_track_image_list:
    # 		with h5py.File(dataset_image_path,'r') as hf_image:

    # 			dataset_image = np.array(hf_image.get(key))
    # 		with h5py.File(dataset_intensity_path,'r') as hf_intensity:
    # 			dataset_intensity = np.array(hf_intensity.get(key))
    # 		print (key, dataset_image.shape,dataset_intensity.shape)
    # train_selected_folder_index = random.sample(range(0,len(train_folder)),10)
    # test_selected_folder_index = random.sample(range(0,len(test_folder)),10)

    hf_image = h5py.File(dataset_image_path)
    hf_intensity = h5py.File(dataset_intensity_path)
    # for i in train_selected_folder_index:
    # 	key = train_folder[i]
    # train_folder=['201314']
    # train_folder=['199406']
    # key_already_list=[]
    # for  subdirs, dirs, files in os.walk('test_file/prediction_output_24/'):
    # 	# print (subdirs)
    # 		for file in files:
    # 			print file
    # 			key_already = file.split('_')[0]
    # 			key_already_list.append(key_already)
    # train_folder = list(set(train_folder) - set(key_already_list))
    # test_folder = list(set(test_folder) - set(key_already_list))
    print(len(train_folder), len(test_folder), 'len_train_test_folder')

    for key in train_folder:
        print(key)
        if os.path.exists(ModelCheckpoint_file):
            print('load  load_weights', ModelCheckpoint_file)
        model.load_weights(ModelCheckpoint_file)
        dataset_image = np.array(hf_image.get(key))
        dataset_intensity = np.array(hf_intensity.get(key))
        # print (dataset_image.shape,'dataset_image')
        if len(dataset_intensity) > look_back:
            train_x, train_y = prepare_dataset.extend_dataset_2(
                dataset_image, dataset_intensity, look_back=look_back)

            train_x = np.array(train_x, dtype='float32')
            train_y = np.array(train_y, dtype='float32')
            if train_x.shape[0] > 0:
                train_predict_image = 'test_file/prediction_output_24/' + str(
                    key) + '_' + str(look_back) + '_train.png'
                trainPredict = model.predict(train_x, batch_size=batch_size)
                model.reset_states()
                trainPredict = prepare_dataset.reverse_normalize_intensity(
                    trainPredict, intensity_mean, intensity_std)
                trainY = prepare_dataset.reverse_normalize_intensity(
                    train_y, intensity_mean, intensity_std)
                # print (trainPredict,'train_predict')
                # print (trainY,'trainY')
                fig = plt.figure()
                plt.title('train_predicts_look_back ' + str(look_back) +
                          ', typhoon number ' + str(key))
                plt.plot(list(trainPredict[20:20000, 0]),
                         'r--',
                         label='train_predict')
                plt.plot(list(trainY[20:20000]), 'g--', label='train_true')
                plt.xlabel('typhoon_image')
                plt.ylabel('typhoon intensity')
                plt.ylim([850, 1050])
                plt.legend(loc='upper left', shadow=True)
                plt.savefig(train_predict_image)
                plt.close(fig)

    # for i in test_selected_folder_index:
    for key in test_folder:
        # key = test_folder[i]
        print(key)
        if os.path.exists(ModelCheckpoint_file):
            print('load  load_weights', ModelCheckpoint_file)
        model.load_weights(ModelCheckpoint_file)
        dataset_image = np.array(hf_image.get(key))
        dataset_intensity = np.array(hf_intensity.get(key))
        if len(dataset_intensity) > look_back:
            test_x, test_y = prepare_dataset.extend_dataset_2(
                dataset_image, dataset_intensity, look_back=look_back)
            test_x = np.array(test_x, dtype='float32')
            test_y = np.array(test_y, dtype='float32')
            if test_x.shape[0] > 0:
                testPredict = model.predict(test_x, batch_size=batch_size)
                model.reset_states()
                # # # invert predictions
                testPredict = prepare_dataset.reverse_normalize_intensity(
                    testPredict, intensity_mean, intensity_std)
                testY = prepare_dataset.reverse_normalize_intensity(
                    test_y, intensity_mean, intensity_std)
                test_predict_image = 'test_file/prediction_output_24/' + str(
                    key) + '_' + str(look_back) + '_test.png'
                fig = plt.figure()
                plt.title('test_predicts_look_back ' + str(look_back) +
                          ', typhoon number ' + str(key))
                plt.plot(list(testPredict[20:10000, 0]),
                         'r--',
                         label='test_predict')
                plt.plot(list(testY[20:10000]), 'g--', label='test_true')
                plt.xlabel('typhoon_image')
                plt.ylabel('typhoon intensity')
                plt.ylim([850, 1050])
                plt.legend(loc='upper left', shadow=True)
                plt.savefig(test_predict_image)
                plt.close(fig)
    hf_image.close()
    hf_intensity.close()

    t2 = time.time()
    print("using  %s seconds" % (t2 - t1))
Ejemplo n.º 14
0
# 20. JSONデータの読みこみ
# Wikipedia記事のJSONファイルを読みこみ「イギリス」に関する記事本文を表示せよ.
# 問題21-29では,ここで抽出した記事本文に対して実行せよ.

# coding: utf-8

import load

print(load.load_json("イギリス"))
Ejemplo n.º 15
0
def main():
    t1 = time.time()
    train_test_file_list_path = config.train_test_file_path_divid
    image_path = config.image_path
    trackDictPath = config.track_dic_path
    track_dict = load.load_json(trackDictPath)
    suspicious_file_list_path = config.suspicious_file_list_path
    suspicious_file_list = load.load_json(suspicious_file_list_path)
    train_validation_test_subdirs_split = config.train_validation_test_subdirs_split
    yType = config.yType
    csv_path = config.csv_path
    confusion_matrix_path = config.confusion_matrix_path
    hist_path = config.hist_path
    nb_epoch = config.nb_epoch
    optimizer_choice = config.optimizer
    img_rows, img_cols = config.img_rows, config.img_cols
    model_check_pointer_file = config.ModelCheckpoint_file
    nb_worker = config.nb_worker
    num_labels = config.num_labels
    batch_size = config.batch_size
    mean_v, std_v = config.mean_v, config.std_v
    if not os.path.exists(train_validation_test_subdirs_split):
        print 'subdirs not split'
        subdirs_list = load.get_subdirs_list(image_path)
        train_subdirs_list, validation_subdirs_list, test_subdirs_list = load.split_subdirs(
            subdirs_list, train_validation_test_subdirs_split)
    else:
        print 'subdirs splitted'

        train_subdirs_list, validation_subdirs_list, test_subdirs_list = load.get_split_subdirs(
            train_validation_test_subdirs_split)
    optimizer = classification_model.optimizer_selection(
        optimizer_choice, nb_epoch)
    model = classification_model.vgg_19(img_rows, img_cols, num_labels,
                                        optimizer)
    model.summary()

    # file_list = subtract_suspicious_list(file_list,suspicious_file_list)
    # trackDictPath = config.track_dic_path
    # yType = config.yType
    train_file_list, test_file_list = load.get_train_test_file_split(
        train_subdirs_list, validation_subdirs_list, test_subdirs_list,
        track_dict, suspicious_file_list)
    validation_file_list = train_file_list[:int(len(train_file_list) * 0.05)]
    train_file_list = train_file_list[int(len(train_file_list) * 0.05):]
    # if not os.path.exists(train_test_file_list_path):
    # 	print 'file_list not splited'
    # 	train_file_list ,validation_file_list,test_file_list =  load.get_train_validation_test_file_split(train_subdirs_list, validation_subdirs_list, test_subdirs_list,track_dict,suspicious_file_list,train_test_file_list_path)
    # else:
    # 	print 'file list splitted'
    # 	train_file_list, validation_file_list,test_file_list = load.load_train_validation_test_file_list(train_test_file_list_path)
    # print len(file_list)
    print len(train_file_list)
    print len(validation_file_list)
    print len(test_file_list)
    load.get_input_2(train_file_list, trackDictPath)
    y_train, y_valid, y_test = load.get_train_validation_test_y(
        train_file_list, validation_file_list, test_file_list, trackDictPath,
        yType)
    print('y_train', len(y_train))
    print('y_valid', len(y_valid))
    print('y_test', len(y_test))
    print(type(y_train))
    # print (y_train[0].shape,'train shape')
    # train_file_list = train_file_list[:2000]
    # validation_file_list = validation_file_list[-1000:]
    # test_file_list = test_file_list[:1000]
    # y_train = y_train[:2000]
    # y_valid = y_valid[-1000:]
    # y_test = y_test[:1000]
    print(get_category_reverse_back(y_train), 'set_y_train')
    print(get_category_reverse_back(y_valid), 'set_y_valid')
    print(get_category_reverse_back(y_test), 'set_y_test')
    print(y_train.shape)
    print(train_file_list, 'train_file_list')
    print(validation_file_list, 'validation_file_list')
    print(test_file_list, 'test_file_list')
    x_train = load.get_x(train_file_list, img_rows, img_cols, mean_v, std_v)
    x_valid = load.get_x(validation_file_list, img_rows, img_cols, mean_v,
                         std_v)
    x_test = load.get_x(test_file_list, img_rows, img_cols, mean_v, std_v)
    print(x_train.shape)
    print(y_train.shape)
    train_generator = load.get_chunk(train_file_list, y_train, img_rows,
                                     img_cols, num_labels)
    validation_generator = load.get_chunk(validation_file_list, y_valid,
                                          img_rows, img_cols, num_labels)
    test_generator = load.get_test_chunk(test_file_list, img_rows, img_cols)
    print(model.layers[0].get_config())
    print(model.layers[-1].get_config())
    if os.path.exists(model_check_pointer_file):
        model.load_weights(model_check_pointer_file)
    # hist = training(model,train_generator,validation_generator,img_rows,img_cols,128,nb_epoch,len(train_file_list),100, nb_worker,model_check_pointer_file)
    # hist = model_training(model,train_generator,validation_generator,img_rows,img_cols,32,nb_epoch,len(train_file_list),model_check_pointer_file)
    hist = classification_model.model_training_whole(model, x_train, y_train,
                                                     x_valid, y_valid,
                                                     batch_size, nb_epoch,
                                                     model_check_pointer_file)
    with open(hist_path, 'w') as f:
        json.dump(hist.history, f)
    model.load_weights(model_check_pointer_file)
    predictions = model_predicting(model, test_generator, len(y_test))
    _predictions = np.argmax(predictions, 1)
    _labels = np.argmax(y_test, 1)
    write_to_csv(test_file_list, _predictions, _labels, csv_path)
    accuracy, cm = get_accuracy(_predictions, _labels, True)
    print(accuracy, 'test accuracy')
    print(optimizer_choice, 'optimizer_choice')
    print(cm, 'cm')
    cm = cm.tolist()
    with open(confusion_matrix_path, 'w') as f:
        json.dump(cm, f)
    t2 = time.time()
    print('using' + str(t2 - t1))