# EXP = merge.Merge_BertBase_SquadDonor_4096 # EXP = merge.Merge_BertBase_HighResource_SquadDonor_4096 execution_items = EXP.create_all_execution_items() print(f"Number of execution items to process: {len(execution_items)}") vast_params = vastai.create_supervisor_params( EXP, execution_items=execution_items, num_workers=1, # num_workers=10, offer_query=vastai.OfferQuery( queries_str=" ".join([ "reliability > 0.95", "num_gpus=1", "dph < 2.25", "inet_down > 100", "inet_up > 75", # "dlperf >= 16", "cuda_vers >= 11.0 has_avx = true", ]), order_str="dlperf_usd-", ), disk_gb=13, image="tensorflow/tensorflow:2.4.0-gpu", ) offers = api_wrapper.query_offers(vast_params) print(f"Number of acceptable offers: {len(offers)}") launch_params = gce.GceParams() node, deploy = gce.launch(execution_items, vast_params, launch_params)
EXP = fisher.FisherComputation_Base_MnliRte execution_items = EXP.create_all_execution_items() print(f"Number of execution items to process: {len(execution_items)}") vast_params = vastai.create_supervisor_params( EXP, execution_items=execution_items, num_workers=2, offer_query=vastai.OfferQuery( queries_str=" ".join([ "reliability > 0.95", "num_gpus=1", "dph < 0.5", "inet_down > 100", "inet_up > 50", "gpu_ram >= 10", # "dlperf >= 16", "cuda_vers >= 11.0 has_avx = true", ]), order_str="dlperf_usd-", ), disk_gb=16, ) offers = api_wrapper.query_offers(vast_params) print(f"Number of acceptable offers: {len(offers)}") launch_params = gce.GceParams() node, deploy = gce.launch(execution_items, vast_params, launch_params)
EXP = fisher.FisherComputation launch_params = gce.GceParams() vast_params = vastai.create_supervisor_params( EXP, num_workers=6, offer_query=vastai.OfferQuery( queries_str=" ".join( [ "reliability > 0.95", "num_gpus=1", "dph < 0.5", "inet_down > 75", "inet_up > 75", "dlperf >= 16", "cuda_vers >= 11.0 has_avx = true", ] ), order_str="dlperf_usd-", ), disk_gb=12, ) offers = api_wrapper.query_offers(vast_params) print(f"Number of acceptable offers: {len(offers)}") execution_items = EXP.create_all_execution_items() print(f"Number of execution items to process: {len(execution_items)}")