def __update_joined_datasets(dataset, update): """Update any joined datasets.""" if 'add' in update: new_dframe = update['add'] for direction, other_dataset, on, j_dataset in dataset.joined_datasets: if 'add' in update: if direction == 'left': # only proceed if on in new dframe if on in new_dframe.columns: left_dframe = other_dataset.dframe(padded=True) # only proceed if new on value is in on column in lhs if len(set(new_dframe[on]).intersection( set(left_dframe[on]))): merged_dframe = join_dataset(left_dframe, dataset, on) j_dataset.replace_observations(merged_dframe) # TODO is it OK not to propagate the join here? else: # if on in new data join with existing data if on in new_dframe: new_dframe = join_dataset(new_dframe, other_dataset, on) calculate_updates(j_dataset, df_to_jsondict(new_dframe), parent_dataset_id=dataset.dataset_id) elif 'delete' in update: j_dataset.delete_observation(update['delete']) elif 'edit' in update: j_dataset.update_observation(*update['edit'])
def join(self, other, on): """Join with dataset `other` on the passed columns. :param other: The other dataset to join. :param on: The column in this and the `other` dataset to join on. """ merged_dframe = self.dframe() if not len(merged_dframe.columns): # Empty dataset, simulate columns merged_dframe = self.place_holder_dframe() merged_dframe = join_dataset(merged_dframe, other, on) merged_dataset = self.create() if self.num_rows and other.num_rows: merged_dataset.save_observations(merged_dframe) else: merged_dataset.build_schema(merged_dframe, set_num_columns=True) merged_dataset.ready() self.add_joined_dataset( ('right', other.dataset_id, on, merged_dataset.dataset_id)) other.add_joined_dataset( ('left', self.dataset_id, on, merged_dataset.dataset_id)) return merged_dataset