def store_datasets_to_files_in_localhost(status, config): """ Finds and stores all files belonging to each dataset. """ #Get file. filename = config["filename"] #Extract datasets ids and paths. datasets = util.find_dataset(filename, "all") datasets_ids = datasets.keys() num_datasets = len(datasets_ids) scan_commands = [] current_dir = os.getcwd() directroy_to_save_files = config["make-list"] #Create the commands that will create the #files containing the paths to data files. for i in range(0, num_datasets): command = "python %s/scan_dataset.py -f %s -d %s --make-list %s/%s.txt" \ %(current_dir, filename, datasets_ids[i], \ directroy_to_save_files, datasets_ids[i]) print "executing : %s" %(command) subprocess.call(command, shell=True)
def store_datasets_to_files_in_lotus(status, config): """ Finds and stores all files belonging to each dataset. """ #Get file. filename = config["filename"] #Extract datasets ids and paths. datasets = util.find_dataset(filename, "all") datasets_ids = datasets.keys() num_datasets = len(datasets_ids) scan_commands = [] current_dir = os.getcwd() directroy_to_save_files = config["make-list"] #Create the commands that will create the #files containing the paths to data files. for i in range(0, num_datasets): command = "python %s/scan_dataset.py -f %s -d %s --make-list %s/%s.txt"\ %(current_dir, filename, datasets_ids[i],\ directroy_to_save_files, datasets_ids[i]) scan_commands.append(command) lotus_max_processes = config["num-processes"] #Run each command in lotus. util.run_tasks_in_lotus(scan_commands, int(lotus_max_processes),\ user_wait_time=30)
def parse_logs(com_args): log_directory = com_args["log_directory"] datasets_file = com_args["filename"] #find all files in log directroy. list_of_files = util.build_file_list(log_directory) num_files = len(list_of_files) summary_info = {} #open each file and exrtact info. for i in range(0, num_files): filename = list_of_files[i] content_list = util.read_file_into_list(filename) summary = util.find_in_list(content_list, "Summary") if summary is not None: words_list = summary.split("Summary", 1)[1].split(",") #dataset dataset = (words_list[0].split())[5] #indexed indexed = int(words_list[1].split()[3]) #database errors database_errors = int(words_list[2].split()[3]) #properties errors properties_errors = int(words_list[3].split()[3]) #total files total_files = int(words_list[4].split()[3]) if dataset not in summary_info: dataset_info = {} #dataset_info["dataset"] = dataset dataset_info["indexed"] = indexed dataset_info["database_errors"] = database_errors dataset_info["properties_errors"] = properties_errors dataset_info["total_files"] = total_files dataset_info["dataset_dir"] = util.find_dataset(datasets_file, dataset) summary_info[dataset] = dataset_info.copy() dataset_info = None else: dataset_info = {} dataset_info = summary_info[dataset] dataset_info["indexed"] = dataset_info["indexed"] + indexed dataset_info["database_errors"] = dataset_info["database_errors"] + database_errors dataset_info["properties_errors"] = dataset_info["properties_errors"] + properties_errors #dataset_info["total_files"] = dataset_info["total_files"] + total_files dataset_info = None #At the end print all information. return summary_info
def scan_datasets_in_localhost(config, scan_status): """ Uses localhost in order to scan files in the filesystem. """ # Get basic options. filename = config["filename"] level = config["level"] current_dir = os.getcwd() # Manage the options given. if scan_status == constants.Script_status.READ_AND_SCAN_DATASETS_SUB: dataset_id = config["dataset"] if "," in dataset_id: dataset_ids_list = dataset_id.split(",") for dataset_id_item in dataset_ids_list: command = "python %s/scan_dataset.py -f %s -d %s -l %s" % ( current_dir, filename, dataset_id_item, level, ) print "executng : %s" % (command) subprocess.call(command, shell=True) # os.system(command) else: command = "python %s/scan_dataset.py -f %s -d %s -l %s" % (current_dir, filename, dataset_id, level) print "executng : %s" % (command) subprocess.call(command, shell=True) elif scan_status == constants.Script_status.READ_AND_SCAN_DATASETS: dataset_ids = util.find_dataset(filename, "all") for key, value in dataset_ids.iteritems(): dataset_id = key command = "python %s/scan_dataset.py -f %s -d %s -l %s" % (current_dir, filename, dataset_id, level) print "executng : %s" % (command) subprocess.call(command, shell=True) elif scan_status == constants.Script_status.READ_DATASET_FROM_FILE_AND_SCAN: read_datasets_from_files_and_scan_in_localhost(config)
def read_and_scan_datasets_in_lotus(config): filename = config["filename"] level = config["level"] current_dir = os.getcwd() dataset_ids = util.find_dataset(filename, "all") keys = dataset_ids.keys() number_of_datasets = len(keys) commands = [] for i in range(0, number_of_datasets): command = "python %s/scan_dataset.py -f %s -d %s -l %s" % (current_dir, filename, keys[i], level) print "created command :" + command commands.append(command) lotus_max_processes = config["num-processes"] util.run_tasks_in_lotus(commands, int(lotus_max_processes), user_wait_time=30)