def main(): # parties config guest = 9999 # partition for data storage partition = 4 dense_data = {"name": "breast_hetero_guest", "namespace": f"experiment"} tag_data = {"name": "tag_value_1", "namespace": f"experiment"} pipeline_upload = PipeLine().set_initiator(role="guest", party_id=guest).set_roles(guest=guest) # add upload data info # original csv file path pipeline_upload.add_upload_data(file=os.path.join(DATA_BASE, "examples/data/breast_hetero_guest.csv"), table_name=dense_data["name"], # table name namespace=dense_data["namespace"], # namespace head=1, partition=partition) # data info pipeline_upload.add_upload_data(file=os.path.join(DATA_BASE, "examples/data/tag_value_1000_140.csv"), table_name=tag_data["name"], namespace=tag_data["namespace"], head=0, partition=partition) # upload all data pipeline_upload.upload(drop=1)
def main(): # parties config guest = 9999 # partition for data storage partition = 4 dense_data = {"name": "breast_hetero_guest", "namespace": "experiment"} tag_data = {"name": "tag_value_1", "namespace": "experiment"} pipeline_upload = PipeLine().set_initiator( role="guest", party_id=guest).set_roles(guest=guest) # add upload data info # csv file name from python path & file name pipeline_upload.add_upload_data( file=os.path.join(DATA_BASE, "examples/data/breast_hetero_guest.csv"), table_name=dense_data["name"], # table name namespace=dense_data["namespace"], # namespace head=1, partition=partition, # data info id_delimiter=",") # id delimiter, needed for spark pipeline_upload.add_upload_data(file=os.path.join( DATA_BASE, "examples/data/tag_value_1000_140.csv"), table_name=tag_data["name"], namespace=tag_data["namespace"], head=0, partition=partition, id_delimiter=",") # upload all data pipeline_upload.upload(drop=1) import json print(json.dumps(pipeline_upload._upload_conf(), indent=4))
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] data_base = config.data_base_dir # partition for data storage partition = 4 # table name and namespace, used in FATE job configuration dense_data = {"name": "breast_hetero_guest", "namespace": f"experiment{namespace}"} tag_data = {"name": "tag_value_1", "namespace": f"experiment{namespace}"} pipeline_upload = PipeLine().set_initiator(role="guest", party_id=guest).set_roles(guest=guest) # add upload data info # path to csv file(s) to be uploaded pipeline_upload.add_upload_data(file=os.path.join(data_base, "examples/data/breast_hetero_guest.csv"), table_name=dense_data["name"], # table name namespace=dense_data["namespace"], # namespace head=1, partition=partition, # data info id_delimiter=",", extend_sid=True) pipeline_upload.add_upload_data(file=os.path.join(data_base, "examples/data/tag_value_1000_140.csv"), table_name=tag_data["name"], namespace=tag_data["namespace"], head=0, partition=partition, id_delimiter=",", extend_sid=True) # upload both data pipeline_upload.upload(drop=1)
def main(data_base=DATA_BASE): # parties config guest = 9999 # partition for data storage partition = 4 # table name and namespace, used in FATE job configuration dense_data = {"name": "breast_hetero_guest", "namespace": f"experiment"} tag_data = {"name": "breast_hetero_host", "namespace": f"experiment"} pipeline_upload = PipeLine().set_initiator( role="guest", party_id=guest).set_roles(guest=guest) # add upload data info # path to csv file(s) to be uploaded, modify to upload designated data # This is an example for standalone version. For cluster version, you will need to upload your data # on each party respectively. pipeline_upload.add_upload_data( file=os.path.join(data_base, "examples/data/breast_hetero_guest.csv"), table_name=dense_data["name"], # table name namespace=dense_data["namespace"], # namespace head=1, partition=partition) # data info pipeline_upload.add_upload_data(file=os.path.join( data_base, "examples/data/breast_hetero_host.csv"), table_name=tag_data["name"], namespace=tag_data["namespace"], head=1, partition=partition) # upload data pipeline_upload.upload(drop=1)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] backend = config.backend work_mode = config.work_mode data_base = config.data_base # partition for data storage partition = 4 dense_data = {"name": "breast_hetero_guest", "namespace": f"experiment{namespace}"} tag_data = {"name": "tag_value_1", "namespace": f"experiment{namespace}"} pipeline_upload = PipeLine().set_initiator(role="guest", party_id=guest).set_roles(guest=guest) # add upload data info # csv file name from python path & file name pipeline_upload.add_upload_data(file=os.path.join(data_base, "examples/data/breast_hetero_guest.csv"), table_name=dense_data["name"], # table name namespace=dense_data["namespace"], # namespace head=1, partition=partition, # data info id_delimiter=",") # needed for spark backend pipeline_upload.add_upload_data(file=os.path.join(data_base, "examples/data/tag_value_1000_140.csv"), table_name=tag_data["name"], namespace=tag_data["namespace"], head=0, partition=partition, id_delimiter=",") # upload all data pipeline_upload.upload(work_mode=work_mode, backend=backend, drop=1)
def test_upload(self): upload_pipeline = PipeLine() upload_pipeline.add_upload_data(file=self.file, table_name=self.table_name, namespace=self.job_id) upload_pipeline.upload() upload_count = session.get_data_table(self.table_name, self.job_id).count() return upload_count == self.data_count
def main(): # parties config guest = 9999 # 0 for eggroll, 1 for spark backend = Backend.EGGROLL # 0 for standalone, 1 for cluster work_mode = WorkMode.STANDALONE # use the work mode below for cluster deployment # work_mode = WorkMode.CLUSTER # partition for data storage partition = 4 dense_data = {"name": "breast_hetero_guest", "namespace": f"experiment"} tag_data = {"name": "tag_value_1", "namespace": f"experiment"} pipeline_upload = PipeLine().set_initiator( role="guest", party_id=guest).set_roles(guest=guest) # add upload data info # original csv file path pipeline_upload.add_upload_data( file=os.path.join(DATA_BASE, "examples/data/breast_hetero_guest.csv"), table_name=dense_data["name"], # table name namespace=dense_data["namespace"], # namespace head=1, partition=partition) # data info pipeline_upload.add_upload_data(file=os.path.join( DATA_BASE, "examples/data/tag_value_1000_140.csv"), table_name=tag_data["name"], namespace=tag_data["namespace"], head=0, partition=partition) # upload all data pipeline_upload.upload(work_mode=work_mode, backend=backend, drop=1)
def main(data_base=DATA_BASE): # parties config guest = 9999 # 0 for eggroll, 1 for spark backend = Backend.EGGROLL # 0 for standalone, 1 for cluster work_mode = WorkMode.STANDALONE # use the work mode below for cluster deployment # work_mode = WorkMode.CLUSTER # partition for data storage partition = 4 # table name and namespace, used in FATE job configuration dense_data = {"name": "breast_hetero_guest", "namespace": f"experiment"} tag_data = {"name": "breast_hetero_host", "namespace": f"experiment"} pipeline_upload = PipeLine().set_initiator( role="guest", party_id=guest).set_roles(guest=guest) # add upload data info # path to csv file(s) to be uploaded, modify to upload designated data pipeline_upload.add_upload_data( file=os.path.join(data_base, "examples/data/breast_hetero_guest.csv"), table_name=dense_data["name"], # table name namespace=dense_data["namespace"], # namespace head=1, partition=partition) # data info pipeline_upload.add_upload_data(file=os.path.join( data_base, "examples/data/breast_hetero_host.csv"), table_name=tag_data["name"], namespace=tag_data["namespace"], head=1, partition=partition) # upload data pipeline_upload.upload(work_mode=work_mode, backend=backend, drop=1)