def fork_procs_insanity_check(p_string): """ This function checks if the pipeline string contains a process between the fork start token or end token and the separator (lane) token. Checks for the absence of processes in one of the branches of the fork ['|)' and '(|'] and for the existence of a process before starting a fork (in an inner fork) ['|(']. Parameters ---------- p_string: str String with the definition of the pipeline, e.g.:: 'processA processB processC(ProcessD | ProcessE)' """ # Check for the absence of processes in one of the branches of the fork # ['|)' and '(|'] and for the existence of a process before starting a fork # (in an inner fork) ['|(']. if FORK_TOKEN + LANE_TOKEN in p_string or \ LANE_TOKEN + CLOSE_TOKEN in p_string or \ LANE_TOKEN + FORK_TOKEN in p_string: raise SanityError("There must be a process between the fork " "start character '(' or end ')' and the separator of " "processes character '|'")
def brackets_insanity_check(p_string): """ This function performs a check for different number of '(' and ')' characters, which indicates that some forks are poorly constructed. Parameters ---------- p_string: str String with the definition of the pipeline, e.g.:: 'processA processB processC(ProcessD | ProcessE)' """ if p_string.count(FORK_TOKEN) != p_string.count(CLOSE_TOKEN): # get the number of each type of bracket and state the one that has a # higher value dict_values = { FORK_TOKEN: p_string.count(FORK_TOKEN), CLOSE_TOKEN: p_string.count(CLOSE_TOKEN) } max_bracket = max(dict_values, key=dict_values.get) raise SanityError( "A different number of '(' and ')' was specified. There are " "{} extra '{}'. The number of '(' and ')'should be equal.".format( str(abs(p_string.count(FORK_TOKEN) - p_string.count(CLOSE_TOKEN))), max_bracket))
def inner_fork_insanity_checks(pipeline_string): """ This function performs two sanity checks in the pipeline string. The first check, assures that each fork contains a lane token '|', while the second check looks for duplicated processes within the same fork. Parameters ---------- pipeline_string: str String with the definition of the pipeline, e.g.:: 'processA processB processC(ProcessD | ProcessE)' """ # first lets get all forks to a list. list_of_forks = [] # stores forks left_indexes = [] # stores indexes of left brackets # iterate through the string looking for '(' and ')'. for pos, char in enumerate(pipeline_string): if char == FORK_TOKEN: # saves pos to left_indexes list left_indexes.append(pos) elif char == CLOSE_TOKEN and len(left_indexes) > 0: # saves fork to list_of_forks list_of_forks.append(pipeline_string[left_indexes[-1] + 1: pos]) # removes last bracket from left_indexes list left_indexes = left_indexes[:-1] # sort list in descending order of number of forks list_of_forks.sort(key=lambda x: x.count(FORK_TOKEN), reverse=True) # Now, we can iterate through list_of_forks and check for errors in each # fork for fork in list_of_forks: # remove inner forks for these checks since each fork has its own entry # in list_of_forks. Note that each fork is now sorted in descending # order which enables to remove sequentially the string for the fork # potentially with more inner forks for subfork in list_of_forks: # checks if subfork is contained in fork and if they are different, # avoiding to remove itself if subfork in list_of_forks and subfork != fork: # removes inner forks. Note that string has no spaces fork_simplified = fork.replace("({})".format(subfork), "") else: fork_simplified = fork # Checks if there is no fork separator character '|' within each fork if not len(fork_simplified.split(LANE_TOKEN)) > 1: raise SanityError("One of the forks doesn't have '|' " "separator between the processes to fork. This is" " the prime suspect: '({})'".format(fork))
def empty_tasks(p_string): """ Function to check if pipeline string is empty or has an empty string Parameters ---------- p_string: str String with the definition of the pipeline, e.g.:: 'processA processB processC(ProcessD | ProcessE)' """ if p_string.strip() == "": raise SanityError("'-t' parameter received an empty string or " "an empty file.")
def brackets_but_no_lanes(p_string): """ Function to check if a LANE_TOKEN is provided but no fork is initiated. Parameters ---------- p_string: str String with the definition of the pipeline, e.g.:: 'processA processB processC(ProcessD | ProcessE)' """ if "|" in p_string and "(" not in p_string: raise SanityError("No fork initiation character '(' was " "provided but there is a fork lane separator " "character '|'")
def lane_char_insanity_check(p_string): """ This function performs a sanity check for multiple '|' character between two processes. Parameters ---------- p_string: str String with the definition of the pipeline, e.g.:: 'processA processB processC(ProcessD | ProcessE)' """ if LANE_TOKEN + LANE_TOKEN in p_string: raise SanityError("Duplicated fork separator character '|'.")
def late_proc_insanity_check(p_string): """ This function checks if there are processes after the close token. It searches for everything that isn't "|" or ")" after a ")" token. Parameters ---------- p_string: str String with the definition of the pipeline, e.g.:: 'processA processB processC(ProcessD | ProcessE)' """ if re.search('\{}[^|)]'.format(CLOSE_TOKEN), p_string): raise SanityError("After a fork it is not allowed to have any " "alphanumeric value.")
def start_proc_insanity_check(p_string): """ This function checks if there is a starting process after the beginning of each fork. It checks for duplicated start tokens ['((']. Parameters ---------- p_string: str String with the definition of the pipeline, e.g.:: 'processA processB processC(ProcessD | ProcessE)' """ if FORK_TOKEN + FORK_TOKEN in p_string: raise SanityError("There must be a starting process after the " "fork before adding a new fork. E.g: proc1 ( proc2.1 " "(proc3.1 | proc3.2) | proc 2.2 )")
def final_char_insanity_check(p_string): """ This function checks if lane token is the last element of the pipeline string. Parameters ---------- p_string: str String with the definition of the pipeline, e.g.:: 'processA processB processC(ProcessD | ProcessE)' """ # Check if last character of string is a LANE_TOKEN if p_string.endswith(LANE_TOKEN): raise SanityError("Fork separator character '|' cannot be the " "last element of pipeline string")