def read_datasets_from_files_and_scan_in_localhost(config): # Get basic options. filename_path = config["filename"] level = config["level"] num_files = config["num-files"] start = config["start"] current_dir = os.getcwd() # Go to directory and create the file list. list_of_cache_files = util.build_file_list(filename_path) commands = [] step = int(num_files) for filename in list_of_cache_files: num_of_lines = util.find_num_lines_in_file(filename) if num_of_lines == 0: continue # calculate number of jobs. number_of_tasks = num_of_lines / int(num_files) remainder = num_of_lines % int(num_files) start = 0 for i in range(0, number_of_tasks): command = " python %s/scan_dataset.py -f %s" " --num-files %s --start %d -l %s" % ( current_dir, filename, num_files, start, level, ) start += step # print "created command :" + command commands.append(command) # include remaning files if remainder > 0: command = ( "python %s/scan_dataset.py -f %s \ --num-files %d --start %d -l %s" % (current_dir, filename, remainder, start, level) ) # print "created command : %s" %(command) commands.append(command) # Run each command in localhost. number_of_commands = len(commands) for i in range(0, number_of_commands): print "Executing command : %s" % (commands[i]) subprocess.call(commands[i], shell=True)
def read_datasets_from_files_and_scan_in_lotus(config): """ basic algorithm: 1. Go to the directory containing the files. 2. Create a file list. 3. Scan each file and determine the number of lines contained. 4. create the appropriate commands. 5. Store commands in a list. 6. Go to the next file. 7. Submit all commands in lotus. """ # Get basic options. filename_path = config["filename"] level = config["level"] num_files = config["num-files"] start = config["start"] current_dir = os.getcwd() # Go to directory and create the file list. list_of_cache_files = util.build_file_list(filename_path) commands = [] step = int(num_files) for filename in list_of_cache_files: num_of_lines = util.find_num_lines_in_file(filename) if num_of_lines == 0: continue # calculate number of jobs. number_of_jobs = num_of_lines / int(num_files) remainder = num_of_lines % int(num_files) start = 0 for i in range(0, number_of_jobs): command = " python %s/scan_dataset.py -f %s --num-files %s --start %d -l %s" % ( current_dir, filename, num_files, start, level, ) start += step print "created command : %s" % (command) commands.append(command) # include remaning files if remainder > 0: command = " python %s/scan_dataset.py -f %s --num-files %d --start %d -l %s" % ( current_dir, filename, remainder, start, level, ) print "created command : %s" % (command) commands.append(command) # Run each command in lotus. # lotus_max_processes = config["num-processes"] # util.run_tasks_in_lotus(commands, int(lotus_max_processes),\ # user_wait_time=30) # ok, we are going to create a file with the commands instead. util.write_list_to_file_nl(commands, "lotus_commands.txt")