Example #1
0
def read_datasets_from_files_and_scan_in_localhost(config):

    # Get basic options.
    filename_path = config["filename"]
    level = config["level"]
    num_files = config["num-files"]
    start = config["start"]
    current_dir = os.getcwd()

    # Go to directory and create the file list.
    list_of_cache_files = util.build_file_list(filename_path)
    commands = []
    step = int(num_files)

    for filename in list_of_cache_files:

        num_of_lines = util.find_num_lines_in_file(filename)

        if num_of_lines == 0:
            continue

        # calculate number of jobs.
        number_of_tasks = num_of_lines / int(num_files)
        remainder = num_of_lines % int(num_files)

        start = 0
        for i in range(0, number_of_tasks):

            command = " python %s/scan_dataset.py -f %s" " --num-files %s  --start %d  -l %s" % (
                current_dir,
                filename,
                num_files,
                start,
                level,
            )

            start += step

            # print "created command :" + command
            commands.append(command)

        # include remaning files
        if remainder > 0:

            command = (
                "python %s/scan_dataset.py -f %s  \
                      --num-files %d  --start %d -l %s"
                % (current_dir, filename, remainder, start, level)
            )

            # print "created command : %s" %(command)
            commands.append(command)

    # Run each command in localhost.
    number_of_commands = len(commands)
    for i in range(0, number_of_commands):
        print "Executing command : %s" % (commands[i])
        subprocess.call(commands[i], shell=True)
Example #2
0
def read_datasets_from_files_and_scan_in_lotus(config):

    """
    basic algorithm:

    1. Go to the directory containing the files.
    2. Create a file list.
    3. Scan each file and determine the number of lines contained.
    4. create the appropriate commands.
    5. Store commands in a list.
    6. Go to the next file.
    7. Submit all commands in lotus.
    """

    # Get basic options.
    filename_path = config["filename"]
    level = config["level"]
    num_files = config["num-files"]
    start = config["start"]
    current_dir = os.getcwd()

    # Go to directory and create the file list.
    list_of_cache_files = util.build_file_list(filename_path)
    commands = []
    step = int(num_files)

    for filename in list_of_cache_files:

        num_of_lines = util.find_num_lines_in_file(filename)

        if num_of_lines == 0:
            continue

        # calculate number of jobs.
        number_of_jobs = num_of_lines / int(num_files)
        remainder = num_of_lines % int(num_files)

        start = 0
        for i in range(0, number_of_jobs):

            command = " python %s/scan_dataset.py -f %s --num-files %s --start %d  -l %s" % (
                current_dir,
                filename,
                num_files,
                start,
                level,
            )

            start += step

            print "created command : %s" % (command)
            commands.append(command)

        # include remaning files
        if remainder > 0:

            command = " python %s/scan_dataset.py -f %s --num-files %d --start %d -l %s" % (
                current_dir,
                filename,
                remainder,
                start,
                level,
            )

            print "created command : %s" % (command)
            commands.append(command)

    # Run each command in lotus.
    # lotus_max_processes = config["num-processes"]
    # util.run_tasks_in_lotus(commands, int(lotus_max_processes),\
    #                        user_wait_time=30)
    # ok, we are going to create a file with the commands instead.
    util.write_list_to_file_nl(commands, "lotus_commands.txt")