def __init__(self, prefix=''): self.prefix = prefix self.save_dir = op.join(SAVE_DIR, prefix) self.config_dir = op.join(CONFIG_DIR, prefix) self.config_dict = {} # list all directories c_dirs = os.listdir(self.config_dir) regex = r'config([0-9]+)' config_list = [ re.search(regex, p)[1] for p in c_dirs if re.search(regex, p) ] s_dirs = os.listdir(self.save_dir) for c_idx in config_list: c_idx = int(c_idx) self.config_dict[c_idx] = {} # read all config params and store them in a dict self.config_dict[c_idx]['path'] = op.join(self.save_dir, f'config{c_idx}') config = ConfigReader(op.join(self.config_dir, f'config{c_idx}')) for name, setting in config.settings.items(): value = setting.get_value() self.config_dict[c_idx][name] = value # add the special OCCLUDER param self.config_dict[c_idx]['OCCLUDER'] = \ ('occluder' in self.config_dict[c_idx]['EXPE']) # did the run complete without error ? self.config_dict[c_idx]['completed'] = 'yes' if prefix: # this means the results come from clusters and were computed # with slurm, so we can read the error logs err_log_path = op.join(self.save_dir, f'config{c_idx}_log.err') with open(err_log_path, 'r') as errf: error_message = errf.readlines() if error_message: self.config_dict[c_idx]['completed'] = 'no' # check if model file and train data are present files = os.listdir(self.config_dict[c_idx]['path']) if 'model.pt' not in files: self.config_dict[c_idx]['completed'] = 'no' if 'train_data.hdf5' not in files: self.config_dict[c_idx]['completed'] = 'no' train_data = utl.load_dict_h5py( op.join(self.config_dict[c_idx]['path'], 'train_data.hdf5')) if len(train_data['energy']) != config.val('NUM_EPOCHS'): print(f'config {c_idx}') print( f'length of train data ({len(train_data["energy"])}) does' f'not match number of epochs ({config.val("NUM_EPOCHS")})') self.config_dict[c_idx]['completed'] = 'partial'
args = parser.parse_args() # num_args = len(sys.argv) - 1 # if num_args != 1: # print('run.py accepts a single argument specifying the config file.') # exit(1) # Read the config file # config = ConfigReader(sys.argv[1]) config_id = args.config config = ConfigReader(f"configs/config{config_id}") RELATIONAL = config.val("RELATIONAL") RELATION_TYPE = config.val("RELATION_TYPE") RECURRENT_TRANSITION = config.val("RECURRENT_TRANSITION") TRAINING = config.val("TRAINING") G_FUNC = config.val("G_FUNC") HINGE = torch.tensor(config.val("HINGE")) NUM_SLOTS = config.val("NUM_SLOTS") SLOT_DIM = config.val("SLOT_DIM") HIDDEN_DIM = config.val("HIDDEN_DIM") NUM_HEADS = config.val("NUM_HEADS") EXPE = config.val("EXPE") NUM_EPOCHS = config.val("NUM_EPOCHS") LEARNING_RATE = config.val("LEARNING_RATE") BATCH_SIZE = config.val("BATCH_SIZE")