Esempio n. 1
0
def fork_procs_insanity_check(p_string):
    """
    This function checks if the pipeline string contains a process between
    the fork start token or end token and the separator (lane) token. Checks for
    the absence of processes in one of the branches of the fork ['|)' and '(|']
    and for the existence of a process before starting a fork (in an inner fork)
    ['|('].

    Parameters
    ----------
    p_string: str
         String with the definition of the pipeline, e.g.::
             'processA processB processC(ProcessD | ProcessE)'

    """

    # Check for the absence of processes in one of the branches of the fork
    # ['|)' and '(|'] and for the existence of a process before starting a fork
    # (in an inner fork) ['|('].
    if FORK_TOKEN + LANE_TOKEN in p_string or \
            LANE_TOKEN + CLOSE_TOKEN in p_string or \
            LANE_TOKEN + FORK_TOKEN in p_string:
        raise SanityError("There must be a process between the fork "
                          "start character '(' or end ')' and the separator of "
                          "processes character '|'")
Esempio n. 2
0
def brackets_insanity_check(p_string):
    """
    This function performs a check for different number of '(' and ')'
    characters, which indicates that some forks are poorly constructed.

    Parameters
    ----------
    p_string: str
         String with the definition of the pipeline, e.g.::
             'processA processB processC(ProcessD | ProcessE)'

    """

    if p_string.count(FORK_TOKEN) != p_string.count(CLOSE_TOKEN):
        # get the number of each type of bracket and state the one that has a
        # higher value
        dict_values = {
            FORK_TOKEN: p_string.count(FORK_TOKEN),
            CLOSE_TOKEN: p_string.count(CLOSE_TOKEN)
        }
        max_bracket = max(dict_values, key=dict_values.get)

        raise SanityError(
            "A different number of '(' and ')' was specified. There are "
            "{} extra '{}'. The number of '(' and ')'should be equal.".format(
                str(abs(p_string.count(FORK_TOKEN) - p_string.count(CLOSE_TOKEN))),
                max_bracket))
Esempio n. 3
0
def inner_fork_insanity_checks(pipeline_string):
    """
    This function performs two sanity checks in the pipeline string. The first
    check, assures that each fork contains a lane token '|', while the second
    check looks for duplicated processes within the same fork.

    Parameters
    ----------
    pipeline_string: str
         String with the definition of the pipeline, e.g.::
             'processA processB processC(ProcessD | ProcessE)'

    """

    # first lets get all forks to a list.
    list_of_forks = []  # stores forks
    left_indexes = []  # stores indexes of left brackets

    # iterate through the string looking for '(' and ')'.
    for pos, char in enumerate(pipeline_string):
        if char == FORK_TOKEN:
            # saves pos to left_indexes list
            left_indexes.append(pos)
        elif char == CLOSE_TOKEN and len(left_indexes) > 0:
            # saves fork to list_of_forks
            list_of_forks.append(pipeline_string[left_indexes[-1] + 1: pos])
            # removes last bracket from left_indexes list
            left_indexes = left_indexes[:-1]

    # sort list in descending order of number of forks
    list_of_forks.sort(key=lambda x: x.count(FORK_TOKEN), reverse=True)

    # Now, we can iterate through list_of_forks and check for errors in each
    # fork
    for fork in list_of_forks:
        # remove inner forks for these checks since each fork has its own entry
        # in list_of_forks. Note that each fork is now sorted in descending
        # order which enables to remove sequentially the string for the fork
        # potentially with more inner forks
        for subfork in list_of_forks:
            # checks if subfork is contained in fork and if they are different,
            # avoiding to remove itself
            if subfork in list_of_forks and subfork != fork:
                # removes inner forks. Note that string has no spaces
                fork_simplified = fork.replace("({})".format(subfork), "")
            else:
                fork_simplified = fork

        # Checks if there is no fork separator character '|' within each fork
        if not len(fork_simplified.split(LANE_TOKEN)) > 1:
            raise SanityError("One of the forks doesn't have '|' "
                              "separator between the processes to fork. This is"
                              " the prime suspect: '({})'".format(fork))
Esempio n. 4
0
def empty_tasks(p_string):
    """
    Function to check if pipeline string is empty or has an empty string

    Parameters
    ----------
    p_string: str
         String with the definition of the pipeline, e.g.::
             'processA processB processC(ProcessD | ProcessE)'

    """
    if p_string.strip() == "":
        raise SanityError("'-t' parameter received an empty string or "
                          "an empty file.")
Esempio n. 5
0
def brackets_but_no_lanes(p_string):
    """
    Function to check if a LANE_TOKEN is provided but no fork is initiated.
    Parameters
    ----------
    p_string: str
         String with the definition of the pipeline, e.g.::
             'processA processB processC(ProcessD | ProcessE)'

    """

    if "|" in p_string and "(" not in p_string:
        raise SanityError("No fork initiation character '(' was "
                          "provided but there is a fork lane separator "
                          "character '|'")
Esempio n. 6
0
def lane_char_insanity_check(p_string):
    """
    This function performs a sanity check for multiple '|' character
    between two processes.

    Parameters
    ----------
    p_string: str
         String with the definition of the pipeline, e.g.::
             'processA processB processC(ProcessD | ProcessE)'

    """

    if LANE_TOKEN + LANE_TOKEN in p_string:
        raise SanityError("Duplicated fork separator character '|'.")
Esempio n. 7
0
def late_proc_insanity_check(p_string):
    """
    This function checks if there are processes after the close token. It
    searches for everything that isn't "|" or ")" after a ")" token.

    Parameters
    ----------
    p_string: str
         String with the definition of the pipeline, e.g.::
             'processA processB processC(ProcessD | ProcessE)'

    """

    if re.search('\{}[^|)]'.format(CLOSE_TOKEN), p_string):
        raise SanityError("After a fork it is not allowed to have any "
                          "alphanumeric value.")
Esempio n. 8
0
def start_proc_insanity_check(p_string):
    """
    This function checks if there is a starting process after the beginning of
    each fork. It checks for duplicated start tokens ['(('].

    Parameters
    ----------
    p_string: str
         String with the definition of the pipeline, e.g.::
             'processA processB processC(ProcessD | ProcessE)'

    """

    if FORK_TOKEN + FORK_TOKEN in p_string:
        raise SanityError("There must be a starting process after the "
                          "fork before adding a new fork. E.g: proc1 ( proc2.1 "
                          "(proc3.1 | proc3.2) | proc 2.2 )")
Esempio n. 9
0
def final_char_insanity_check(p_string):
    """
    This function checks if lane token is the last element of the pipeline
    string.

    Parameters
    ----------
    p_string: str
         String with the definition of the pipeline, e.g.::
             'processA processB processC(ProcessD | ProcessE)'

    """

    # Check if last character of string is a LANE_TOKEN
    if p_string.endswith(LANE_TOKEN):
        raise SanityError("Fork separator character '|' cannot be the "
                          "last element of pipeline string")