type=int, help="Offset where to start processing") parser.add_argument('--results_dir', default='results', help="Name of results directory") opts = parser.parse_args() assert opts.o is None or len( opts.datasets ) == 1, "Cannot specify result filename with more than one dataset" for dataset_path in opts.datasets: assert os.path.isfile( check_extension(dataset_path)), "File does not exist!" dataset_basename, ext = os.path.splitext( os.path.split(dataset_path)[-1]) if opts.o is None: results_dir = os.path.join(opts.results_dir, "pctsp", dataset_basename) os.makedirs(results_dir, exist_ok=True) out_file = os.path.join( results_dir, "{}{}{}-{}{}".format( dataset_basename, "offs{}".format(opts.offset) if opts.offset is not None else "", "n{}".format(opts.n) if opts.n is not None else "", opts.method, ext))
else [opts.data_distribution] } for problem, distributions in problems.items(): for distribution in distributions or [None]: for graph_size in opts.graph_sizes: datadir = os.path.join(opts.data_dir, problem) os.makedirs(datadir, exist_ok=True) if opts.filename is None: filename = os.path.join(datadir, "{}{}{}_{}_seed{}.pkl".format( problem, "_{}".format(distribution) if distribution is not None else "", graph_size, opts.name, opts.seed)) else: filename = check_extension(opts.filename) assert opts.f or not os.path.isfile(check_extension(filename)), \ "File already exists! Try running with -f option to overwrite." np.random.seed(opts.seed) if problem == 'vrp': dataset = generate_vrp_data(opts.dataset_size, graph_size) elif problem == 'vrptw': dataset = generate_vrptw_data(opts.dataset_size, graph_size) else: assert False, "Unknown problem: {}".format(problem) save_dataset(dataset, filename)
type=int, default=10000, help="Size of the dataset") parser.add_argument( '--graph_size', type=int, nargs='+', default=20, help="Sizes of problem instances (default 20, 50, 100)") parser.add_argument("-f", action='store_true', help="Set true to overwrite") parser.add_argument('--seed', type=int, default=4321, help="Random seed") opts = parser.parse_args() datadir = os.path.join(opts.data_dir, opts.problem) os.makedirs(datadir, exist_ok=True) if opts.filename is None: filename = os.path.join( datadir, "{}{}_{}_seed{}.pkl".format(opts.problem, opts.graph_size, opts.name, opts.seed)) else: filename = check_extension(opts.filename) np.random.seed(opts.seed) dataset = generate_tsp_data(opts.dataset_size, opts.graph_size) print(dataset[0]) save_dataset(dataset, filename)
parser.add_argument("-f", action='store_true', help="Set true to overwrite") parser.add_argument("-o", default=None, help="Name of the results file to write") parser.add_argument("--cpus", type=int, help="Number of CPUs to use, defaults to all cores") parser.add_argument('--disable_cache', action='store_true', help='Disable caching') parser.add_argument('--progress_bar_mininterval', type=float, default=0.1, help='Minimum interval') parser.add_argument('-n', type=int, help="Number of instances to process") parser.add_argument('--offset', type=int, help="Offset where to start processing") parser.add_argument('--results_dir', default='results', help="Name of results directory") opts = parser.parse_args() assert opts.o is None or len(opts.datasets) == 1, "Cannot specify result filename with more than one dataset" for dataset_path in opts.datasets: assert os.path.isfile(check_extension(dataset_path)), "File does not exist!" dataset_basename, ext = os.path.splitext(os.path.split(dataset_path)[-1]) if opts.o is None: results_dir = os.path.join(opts.results_dir, "pctsp", dataset_basename) os.makedirs(results_dir, exist_ok=True) out_file = os.path.join(results_dir, "{}{}{}-{}{}".format( dataset_basename, "offs{}".format(opts.offset) if opts.offset is not None else "", "n{}".format(opts.n) if opts.n is not None else "", opts.method, ext )) else: out_file = opts.o
def run(opts): # disable sync os.environ['WANDB_MODE'] = 'dryrun' # initialize wandb wandb.init(project='Network1') # load all arguments to config to save as hyperparameters wandb.config.update(opts) # Pretty print the run args pp.pprint(vars(opts)) # Set the random seed torch.manual_seed(opts.seed) # Optionally configure tensorboard tb_logger = None if not opts.no_tensorboard: tb_logger = TbLogger( os.path.join(opts.log_dir, "{}_{}".format(opts.problem, opts.graph_size), opts.run_name)) os.makedirs(opts.save_dir) # Save arguments so exact configuration can always be found with open(os.path.join(opts.save_dir, "args.json"), 'w') as f: json.dump(vars(opts), f, indent=True) # Set the device opts.device = torch.device("cuda" if opts.use_cuda else "cpu") # load created dataset from path train_dataset = load_dataset(filename=opts.train_dataset) # For now, val and train dataset are same val_dataset = load_dataset(filename=opts.val_dataset) # initialize model(need to be modified for regression case) model = Net1(n_features=train_dataset[0].num_features, embed_dim=opts.embed_dim, out_features=opts.num_output).to(opts.device) # code for multiple gpu model(disabled for now) # enable once the model runs successfully for single GPU # if opts.use_cuda and torch.cuda.device_count() > 1: # print("No. of GPUs:", torch.cuda.device_count()) # model = torch.nn.DataParallel(model) # initialize optimizer optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4) # save pytorch model and track all of the gradients and optionally parameters wandb.watch(model, log='all') # "gradients", "parameters", "all", or None. # start training training_status = train(model, optimizer, train_dataset, val_dataset, tb_logger, opts) print(training_status) # Get the embeddings and save to create data for Network2 scores, embeddings = compute_embeddings(model, opts, data=train_dataset) # save embeddings to create dataset for Network2 filename = os.path.join( opts.data_dir, "op{}{}_{}N{}_seed{}_label_{}_embeddings.pkl".format( "_{}".format(opts.data_distribution) if opts.data_distribution is not None else "", opts.graph_size, opts.name, opts.dataset_size, opts.dataset_seed, opts.label_type)) assert opts.f or not os.path.isfile(check_extension(filename)), \ "File already exists! Try running with -f option to overwrite." print(filename) save_dataset([embeddings, scores], filename) print("Embeddings Computed, shape:{}".format(embeddings.shape)) print("Scores Computed, shape:{}".format(scores.shape))