def store_datasets_to_files_in_lotus(status, config): """ Finds and stores all files belonging to each dataset. """ #Get file. filename = config["filename"] #Extract datasets ids and paths. datasets = util.find_dataset(filename, "all") datasets_ids = datasets.keys() num_datasets = len(datasets_ids) scan_commands = [] current_dir = os.getcwd() directroy_to_save_files = config["make-list"] #Create the commands that will create the #files containing the paths to data files. for i in range(0, num_datasets): command = "python %s/scan_dataset.py -f %s -d %s --make-list %s/%s.txt"\ %(current_dir, filename, datasets_ids[i],\ directroy_to_save_files, datasets_ids[i]) scan_commands.append(command) lotus_max_processes = config["num-processes"] #Run each command in lotus. util.run_tasks_in_lotus(scan_commands, int(lotus_max_processes),\ user_wait_time=30)
def read_and_scan_datasets_in_lotus(config): filename = config["filename"] level = config["level"] current_dir = os.getcwd() dataset_ids = util.find_dataset(filename, "all") keys = dataset_ids.keys() number_of_datasets = len(keys) commands = [] for i in range(0, number_of_datasets): command = "python %s/scan_dataset.py -f %s -d %s -l %s" % (current_dir, filename, keys[i], level) print "created command :" + command commands.append(command) lotus_max_processes = config["num-processes"] util.run_tasks_in_lotus(commands, int(lotus_max_processes), user_wait_time=30)