コード例 #1
0
ファイル: scan_dataset.py プロジェクト: cedadev/ceda-fbs
def sig_handler(signum, frame):

    """
    Catches SIGTERM, SIGINT, SIGHUP signals
    cleans tmp directoy 
    and terminates the process.
    """

    script_user = getpass.getuser()
    tmp_dir_files = util.build_file_list("/tmp")
    print "Signal {} received deleting tmp files:".format(signum)
    for filename in tmp_dir_files:

        try:
            stat_info = os.stat(filename)
            uid = stat_info.st_uid
            file_owner = pwd.getpwuid(uid)[0]

            if script_user == file_owner:
                print filename
                os.remove(filename)
        except:
            pass

    raise SystemExit(signum)
コード例 #2
0
ファイル: scan_archive.py プロジェクト: cedadev/ceda-fbs
def read_datasets_from_files_and_scan_in_localhost(config):

    # Get basic options.
    filename_path = config["filename"]
    level = config["level"]
    num_files = config["num-files"]
    start = config["start"]
    current_dir = os.getcwd()

    # Go to directory and create the file list.
    list_of_cache_files = util.build_file_list(filename_path)
    commands = []
    step = int(num_files)

    for filename in list_of_cache_files:

        num_of_lines = util.find_num_lines_in_file(filename)

        if num_of_lines == 0:
            continue

        # calculate number of jobs.
        number_of_tasks = num_of_lines / int(num_files)
        remainder = num_of_lines % int(num_files)

        start = 0
        for i in range(0, number_of_tasks):

            command = " python %s/scan_dataset.py -f %s" " --num-files %s  --start %d  -l %s" % (
                current_dir,
                filename,
                num_files,
                start,
                level,
            )

            start += step

            # print "created command :" + command
            commands.append(command)

        # include remaning files
        if remainder > 0:

            command = (
                "python %s/scan_dataset.py -f %s  \
                      --num-files %d  --start %d -l %s"
                % (current_dir, filename, remainder, start, level)
            )

            # print "created command : %s" %(command)
            commands.append(command)

    # Run each command in localhost.
    number_of_commands = len(commands)
    for i in range(0, number_of_commands):
        print "Executing command : %s" % (commands[i])
        subprocess.call(commands[i], shell=True)
コード例 #3
0
ファイル: scan_logfiles.py プロジェクト: cedadev/ceda-fbs
def parse_logs(com_args):

    log_directory = com_args["log_directory"]
    datasets_file = com_args["filename"]

    #find all files in log directroy.
    list_of_files = util.build_file_list(log_directory)
    num_files = len(list_of_files)

    summary_info = {}

    #open each file and exrtact info.
    for i in range(0, num_files):

        filename = list_of_files[i]
        content_list = util.read_file_into_list(filename)
        summary = util.find_in_list(content_list, "Summary")

        if summary is not None:
            words_list = summary.split("Summary", 1)[1].split(",")
            #dataset
            dataset = (words_list[0].split())[5]
            #indexed
            indexed = int(words_list[1].split()[3])
            #database errors
            database_errors = int(words_list[2].split()[3])
            #properties errors
            properties_errors = int(words_list[3].split()[3])
            #total files
            total_files = int(words_list[4].split()[3])

            if dataset not in  summary_info:
                dataset_info = {}
                #dataset_info["dataset"] = dataset
                dataset_info["indexed"] = indexed
                dataset_info["database_errors"] = database_errors
                dataset_info["properties_errors"] = properties_errors
                dataset_info["total_files"] = total_files
                dataset_info["dataset_dir"] = util.find_dataset(datasets_file, dataset)

                summary_info[dataset] = dataset_info.copy()
                dataset_info = None
            else:
                dataset_info = {}
                dataset_info = summary_info[dataset]
                dataset_info["indexed"] = dataset_info["indexed"] + indexed
                dataset_info["database_errors"] = dataset_info["database_errors"] + database_errors
                dataset_info["properties_errors"] = dataset_info["properties_errors"] + properties_errors
                #dataset_info["total_files"] = dataset_info["total_files"] + total_files
                dataset_info = None
        #At the end print all information.

    return summary_info
コード例 #4
0
ファイル: file_sampler.py プロジェクト: cedadev/ceda-fbs
def sample_files(in_path, out_path):

    #Get basic options.

    #Go to directory and create the file list.
    list_of_cache_files = util.build_file_list(in_path)
    counter = 0

    for filename in list_of_cache_files:
        contents = util.read_file_into_list(filename)
        new_file_name = os.path.join(out_path, os.path.basename(filename) + "-sample")
        fd = open(new_file_name, "a")
        for item in contents:
            if item.rstrip().endswith(".pp"):
                fd.write(item)
                counter = counter + 1
                if counter > 1000:
                    break
コード例 #5
0
ファイル: scan_archive.py プロジェクト: cedadev/ceda-fbs
def read_datasets_from_files_and_scan_in_lotus(config):

    """
    basic algorithm:

    1. Go to the directory containing the files.
    2. Create a file list.
    3. Scan each file and determine the number of lines contained.
    4. create the appropriate commands.
    5. Store commands in a list.
    6. Go to the next file.
    7. Submit all commands in lotus.
    """

    # Get basic options.
    filename_path = config["filename"]
    level = config["level"]
    num_files = config["num-files"]
    start = config["start"]
    current_dir = os.getcwd()

    # Go to directory and create the file list.
    list_of_cache_files = util.build_file_list(filename_path)
    commands = []
    step = int(num_files)

    for filename in list_of_cache_files:

        num_of_lines = util.find_num_lines_in_file(filename)

        if num_of_lines == 0:
            continue

        # calculate number of jobs.
        number_of_jobs = num_of_lines / int(num_files)
        remainder = num_of_lines % int(num_files)

        start = 0
        for i in range(0, number_of_jobs):

            command = " python %s/scan_dataset.py -f %s --num-files %s --start %d  -l %s" % (
                current_dir,
                filename,
                num_files,
                start,
                level,
            )

            start += step

            print "created command : %s" % (command)
            commands.append(command)

        # include remaning files
        if remainder > 0:

            command = " python %s/scan_dataset.py -f %s --num-files %d --start %d -l %s" % (
                current_dir,
                filename,
                remainder,
                start,
                level,
            )

            print "created command : %s" % (command)
            commands.append(command)

    # Run each command in lotus.
    # lotus_max_processes = config["num-processes"]
    # util.run_tasks_in_lotus(commands, int(lotus_max_processes),\
    #                        user_wait_time=30)
    # ok, we are going to create a file with the commands instead.
    util.write_list_to_file_nl(commands, "lotus_commands.txt")