print(f'num of test sentiment files: {len(test_sentiment_files)}') # ### Train # In[ ]: # Images: train_df_ids = train[['PetID']] print(train_df_ids.shape) # Metadata: train_df_ids = train[['PetID']] train_df_metadata = pd.DataFrame(train_metadata_files) train_df_metadata.columns = ['metadata_filename'] train_metadata_pets = train_df_metadata['metadata_filename'].apply( lambda x: x.split(split_char)[-1].split('-')[0]) train_df_metadata = train_df_metadata.assign(PetID=train_metadata_pets) print(len(train_metadata_pets.unique())) pets_with_metadatas = len( np.intersect1d(train_metadata_pets.unique(), train_df_ids['PetID'].unique())) print( f'fraction of pets with metadata: {pets_with_metadatas / train_df_ids.shape[0]:.3f}' ) # Sentiment: train_df_ids = train[['PetID']] train_df_sentiment = pd.DataFrame(train_sentiment_files) train_df_sentiment.columns = ['sentiment_filename'] train_sentiment_pets = train_df_sentiment['sentiment_filename'].apply(
test_image_files = sorted(glob.glob('../input/petfinder-adoption-prediction/test_images/*.jpg')) test_metadata_files = sorted(glob.glob('../input/petfinder-adoption-prediction/test_metadata/*.json')) test_sentiment_files = sorted(glob.glob('../input/petfinder-adoption-prediction/test_sentiment/*.json')) print(f'num of test images files: {len(test_image_files)}') print(f'num of test metadata files: {len(test_metadata_files)}') print(f'num of test sentiment files: {len(test_sentiment_files)}') # Images: train_df_ids = train[['PetID']] print(train_df_ids.shape) # Metadata: train_df_ids = train[['PetID']] train_df_metadata = pd.DataFrame(train_metadata_files) train_df_metadata.columns = ['metadata_filename'] train_metadata_pets = train_df_metadata['metadata_filename'].apply(lambda x: x.split(split_char)[-1].split('-')[0]) train_df_metadata = train_df_metadata.assign(PetID=train_metadata_pets) print(len(train_metadata_pets.unique())) pets_with_metadatas = len(np.intersect1d(train_metadata_pets.unique(), train_df_ids['PetID'].unique())) print(f'fraction of pets with metadata: {pets_with_metadatas / train_df_ids.shape[0]:.3f}') # Sentiment: train_df_ids = train[['PetID']] train_df_sentiment = pd.DataFrame(train_sentiment_files) train_df_sentiment.columns = ['sentiment_filename'] train_sentiment_pets = train_df_sentiment['sentiment_filename'].apply(lambda x: x.split(split_char)[-1].split('.')[0]) train_df_sentiment = train_df_sentiment.assign(PetID=train_sentiment_pets) print(len(train_sentiment_pets.unique())) pets_with_sentiments = len(np.intersect1d(train_sentiment_pets.unique(), train_df_ids['PetID'].unique()))