def sig_handler(signum, frame): """ Catches SIGTERM, SIGINT, SIGHUP signals cleans tmp directoy and terminates the process. """ script_user = getpass.getuser() tmp_dir_files = util.build_file_list("/tmp") print "Signal {} received deleting tmp files:".format(signum) for filename in tmp_dir_files: try: stat_info = os.stat(filename) uid = stat_info.st_uid file_owner = pwd.getpwuid(uid)[0] if script_user == file_owner: print filename os.remove(filename) except: pass raise SystemExit(signum)
def read_datasets_from_files_and_scan_in_localhost(config): # Get basic options. filename_path = config["filename"] level = config["level"] num_files = config["num-files"] start = config["start"] current_dir = os.getcwd() # Go to directory and create the file list. list_of_cache_files = util.build_file_list(filename_path) commands = [] step = int(num_files) for filename in list_of_cache_files: num_of_lines = util.find_num_lines_in_file(filename) if num_of_lines == 0: continue # calculate number of jobs. number_of_tasks = num_of_lines / int(num_files) remainder = num_of_lines % int(num_files) start = 0 for i in range(0, number_of_tasks): command = " python %s/scan_dataset.py -f %s" " --num-files %s --start %d -l %s" % ( current_dir, filename, num_files, start, level, ) start += step # print "created command :" + command commands.append(command) # include remaning files if remainder > 0: command = ( "python %s/scan_dataset.py -f %s \ --num-files %d --start %d -l %s" % (current_dir, filename, remainder, start, level) ) # print "created command : %s" %(command) commands.append(command) # Run each command in localhost. number_of_commands = len(commands) for i in range(0, number_of_commands): print "Executing command : %s" % (commands[i]) subprocess.call(commands[i], shell=True)
def parse_logs(com_args): log_directory = com_args["log_directory"] datasets_file = com_args["filename"] #find all files in log directroy. list_of_files = util.build_file_list(log_directory) num_files = len(list_of_files) summary_info = {} #open each file and exrtact info. for i in range(0, num_files): filename = list_of_files[i] content_list = util.read_file_into_list(filename) summary = util.find_in_list(content_list, "Summary") if summary is not None: words_list = summary.split("Summary", 1)[1].split(",") #dataset dataset = (words_list[0].split())[5] #indexed indexed = int(words_list[1].split()[3]) #database errors database_errors = int(words_list[2].split()[3]) #properties errors properties_errors = int(words_list[3].split()[3]) #total files total_files = int(words_list[4].split()[3]) if dataset not in summary_info: dataset_info = {} #dataset_info["dataset"] = dataset dataset_info["indexed"] = indexed dataset_info["database_errors"] = database_errors dataset_info["properties_errors"] = properties_errors dataset_info["total_files"] = total_files dataset_info["dataset_dir"] = util.find_dataset(datasets_file, dataset) summary_info[dataset] = dataset_info.copy() dataset_info = None else: dataset_info = {} dataset_info = summary_info[dataset] dataset_info["indexed"] = dataset_info["indexed"] + indexed dataset_info["database_errors"] = dataset_info["database_errors"] + database_errors dataset_info["properties_errors"] = dataset_info["properties_errors"] + properties_errors #dataset_info["total_files"] = dataset_info["total_files"] + total_files dataset_info = None #At the end print all information. return summary_info
def sample_files(in_path, out_path): #Get basic options. #Go to directory and create the file list. list_of_cache_files = util.build_file_list(in_path) counter = 0 for filename in list_of_cache_files: contents = util.read_file_into_list(filename) new_file_name = os.path.join(out_path, os.path.basename(filename) + "-sample") fd = open(new_file_name, "a") for item in contents: if item.rstrip().endswith(".pp"): fd.write(item) counter = counter + 1 if counter > 1000: break
def read_datasets_from_files_and_scan_in_lotus(config): """ basic algorithm: 1. Go to the directory containing the files. 2. Create a file list. 3. Scan each file and determine the number of lines contained. 4. create the appropriate commands. 5. Store commands in a list. 6. Go to the next file. 7. Submit all commands in lotus. """ # Get basic options. filename_path = config["filename"] level = config["level"] num_files = config["num-files"] start = config["start"] current_dir = os.getcwd() # Go to directory and create the file list. list_of_cache_files = util.build_file_list(filename_path) commands = [] step = int(num_files) for filename in list_of_cache_files: num_of_lines = util.find_num_lines_in_file(filename) if num_of_lines == 0: continue # calculate number of jobs. number_of_jobs = num_of_lines / int(num_files) remainder = num_of_lines % int(num_files) start = 0 for i in range(0, number_of_jobs): command = " python %s/scan_dataset.py -f %s --num-files %s --start %d -l %s" % ( current_dir, filename, num_files, start, level, ) start += step print "created command : %s" % (command) commands.append(command) # include remaning files if remainder > 0: command = " python %s/scan_dataset.py -f %s --num-files %d --start %d -l %s" % ( current_dir, filename, remainder, start, level, ) print "created command : %s" % (command) commands.append(command) # Run each command in lotus. # lotus_max_processes = config["num-processes"] # util.run_tasks_in_lotus(commands, int(lotus_max_processes),\ # user_wait_time=30) # ok, we are going to create a file with the commands instead. util.write_list_to_file_nl(commands, "lotus_commands.txt")