def pass_fail_matrix(): """ Constructs the stagewise pass/fail matrix from the error_log. """ err_log = wd.err_log_path matrix_path, junk = os.path.split(err_log) matrix_path += "/yap_pass_fail_matrix.log" sample_log_dict = {} pass_fail = {} for i in glob.glob(err_log + "/*"): path, file = os.path.split(i) file, ext = os.path.splitext(file) sample_log_dict[file.rstrip("_err")] = yap_file_io.read_file(i) # len-1 chunks for i in sample_log_dict.keys(): pass_fail_dict(sample_log_dict[i], pass_fail) if not set(pass_fail.keys()).issubset(set(['CUFFDIFF', 'CUFFCOMPARE', 'CUFFMERGE', 'MACS2'])): if wd.run_preprocess_analysis == "yes" and pass_fail.get("PREPROCESS") is None: pass_fail["PREPROCESS"] = "FAIL" elif wd.run_preprocess_analysis == "no": pass_fail["PREPROCESS"] = "N/A" if wd.run_reference_alignment == "yes" and pass_fail.get("ALIGNMENT") is None: pass_fail["ALIGNMENT"] = "FAIL" elif wd.run_reference_alignment == "no": pass_fail["ALIGNMENT"] = "N/A" if wd.run_postprocess_analysis == "yes" and pass_fail.get("POSTPROCESS") is None: pass_fail["POSTPROCESS"] = "FAIL" elif wd.run_postprocess_analysis == "no": pass_fail["POSTPROCESS"] = "N/A" if pass_fail["PREPROCESS"] == "FAIL": pass_fail["ALIGNMENT"] = "FAIL" pass_fail["POSTPROCESS"] = "FAIL" sample_log_dict[i] = pass_fail pass_fail = {} print_matrix(sample_log_dict, matrix_path)
def read_barcodes(bar_file): '''Reads barcode file and generates dictionary''' bar_arr = yap_file_io.read_file(bar_file) bar1 = [] barcode_dict = {} for b in range(len(bar_arr)): matchobj = re.search('\s*(\w*)(\s*).*', bar_arr[b].strip('\n'), re.M | re.I) if matchobj: split_by = matchobj.group(2) barcode = bar_arr[b].split(split_by)[1] bar_id = bar_arr[b].split(split_by)[0] barcode_dict[bar_id] = barcode barcode_dict['unmatched'] = 'barcode_unmatched' return barcode_dict
def read_barcodes(bar_file): '''Reads barcode file and generates dictionary''' bar_arr = yap_file_io.read_file(bar_file) bar1 = [] barcode_dict = {} for b in range(len(bar_arr)): matchobj = re.search( '\s*(\w*)(\s*).*', bar_arr[b].strip('\n'), re.M | re.I) if matchobj: split_by = matchobj.group(2) barcode = bar_arr[b].split(split_by)[1] bar_id = bar_arr[b].split(split_by)[0] barcode_dict[bar_id] = barcode barcode_dict['unmatched'] = 'barcode_unmatched' return barcode_dict
def sample_filter(): """ Filters failed samples after the alignment step. """ ignore_list = [] inp_files_list=[] list_of_samples=[] list_of_samples_to_compare=[] for i in wd.inp_files_list: pass_fail = {} err_log = wd.err_log_path + "/" + i[2] + "_err.log" if os.path.exists(err_log): pass_fail_dict(yap_file_io.read_file(err_log), pass_fail) if 'FAIL' in pass_fail.itervalues(): ignore_list.append(i) if len(ignore_list) != 0: list_of_samples_to_compare = remove_corrupted_samples(wd.list_of_samples_to_compare,ignore_list) list_of_samples = remove_corrupted_samples(wd.list_of_samples, ignore_list) inp_files_list = [i for i in wd.inp_files_list if i not in ignore_list] return inp_files_list,list_of_samples,list_of_samples_to_compare,ignore_list else: return wd.inp_files_list, wd.list_of_samples, wd.list_of_samples_to_compare,ignore_list
def sample_filter(): """ Filters failed samples after the alignment step. """ ignore_list = [] inp_files_list = [] list_of_samples = [] list_of_samples_to_compare = [] for i in wd.inp_files_list: pass_fail = {} err_log = wd.err_log_path + "/" + i[2] + "_err.log" if os.path.exists(err_log): pass_fail_dict(yap_file_io.read_file(err_log), pass_fail) if 'FAIL' in pass_fail.itervalues(): ignore_list.append(i) if len(ignore_list) != 0: list_of_samples_to_compare = remove_corrupted_samples( wd.list_of_samples_to_compare, ignore_list) list_of_samples = remove_corrupted_samples(wd.list_of_samples, ignore_list) inp_files_list = [i for i in wd.inp_files_list if i not in ignore_list] return inp_files_list, list_of_samples, list_of_samples_to_compare, ignore_list else: return wd.inp_files_list, wd.list_of_samples, wd.list_of_samples_to_compare, ignore_list
def pass_fail_matrix(): """ Constructs the stagewise pass/fail matrix from the error_log. """ err_log = wd.err_log_path matrix_path, junk = os.path.split(err_log) matrix_path += "/yap_pass_fail_matrix.log" sample_log_dict = {} pass_fail = {} for i in glob.glob(err_log + "/*"): path, file = os.path.split(i) file, ext = os.path.splitext(file) sample_log_dict[file.rstrip("_err")] = yap_file_io.read_file( i) # len-1 chunks for i in sample_log_dict.keys(): pass_fail_dict(sample_log_dict[i], pass_fail) if not set(pass_fail.keys()).issubset( set(['CUFFDIFF', 'CUFFCOMPARE', 'CUFFMERGE', 'MACS2'])): if wd.run_preprocess_analysis == "yes" and pass_fail.get( "PREPROCESS") is None: pass_fail["PREPROCESS"] = "FAIL" elif wd.run_preprocess_analysis == "no": pass_fail["PREPROCESS"] = "N/A" if wd.run_reference_alignment == "yes" and pass_fail.get( "ALIGNMENT") is None: pass_fail["ALIGNMENT"] = "FAIL" elif wd.run_reference_alignment == "no": pass_fail["ALIGNMENT"] = "N/A" if wd.run_postprocess_analysis == "yes" and pass_fail.get( "POSTPROCESS") is None: pass_fail["POSTPROCESS"] = "FAIL" elif wd.run_postprocess_analysis == "no": pass_fail["POSTPROCESS"] = "N/A" if pass_fail["PREPROCESS"] == "FAIL": pass_fail["ALIGNMENT"] = "FAIL" pass_fail["POSTPROCESS"] = "FAIL" sample_log_dict[i] = pass_fail pass_fail = {} print_matrix(sample_log_dict, matrix_path)
workflow_config_file = sys.argv[2] run_mode = "--check" else: print_usage = 'True' else: print_usage = 'True' if print_usage == 'True': print "Options : " print " To print help : ", " yap --help or yap -h" print " To do configuration file format check : ", " yap --check [workflow_configuration_filename], ", "\"eg: yap --check workflow_configuration.cfg \"" print " To run YAP: ", " yap -n [number of processors] [workflow_configuration_filename], ", "\"eg: yap -n 2 workflow_configuration.cfg \"", "(yap run with 2 processors)" print "exiting the program" exit_status = 'True' if exit_status != 'True': #reads the main workflow configuration file workflow_file_data = yap_file_io.read_file(workflow_config_file) if len(workflow_file_data) > 0: #pass the file data to workflow parser workflow_struct, workflow_errorlist = yap_tools.workflow_parser(workflow_file_data, workflow_config_file,nprocs) if len(workflow_struct) < 2 or len(workflow_errorlist) > 0: print"Format Error : while parsing the workflow configuration file= ", workflow_config_file, "\n" print"YAP analysis general metadata mising or syntax error.", "\n" print"Note:Use symbol(:begin) and (:end) to define command sections,enclose variable and corresponding values in double quotes(eg \"..\")" print"Use (:=) in variable value assignment (eg \"variable\" := \"value\")" print"To add comments,start the line with symbol(#)" for i in workflow_errorlist: print i exit_status = 'True' if exit_status != 'True': if len(workflow_struct) != 0: #check if the workflow configuration passes the sanity checks