Exemple #1
0
    def _establish_connection(self, run_id, dict_dag):

        try:

            static_info = self._prepare_static_info()

            r = requests.post(self.broadcast_address,
                              json={
                                  "run_id": run_id,
                                  "dag_json": dict_dag,
                                  "pipeline_files": static_info
                              })
            if r.status_code != 201:
                logger.error(
                    colored_print(
                        "ERROR: There was a problem sending data to the server"
                        "with reason: {}".format(r.reason)))
                sys.exit(1)
        except requests.exceptions.ConnectionError:
            logger.error(
                colored_print(
                    "ERROR: Could not establish connection with server. The server"
                    " may be down or there is a problem with your internet "
                    "connection.", "red_bold"))
            sys.exit(1)
Exemple #2
0
    def _close_connection(self, report_id):
        """Sends a delete request for the report JSON hash

        Parameters
        ----------
        report_id : str
            Hash of the report JSON as retrieved from :func:`~_get_report_hash`
        """

        logger.debug(
            "Closing connection and sending DELETE request to {}".format(
                self.broadcast_address))

        try:
            r = requests.delete(self.broadcast_address,
                                json={"run_id": report_id})
            if r.status_code != 202:
                logger.error(
                    colored_print(
                        "ERROR: There was a problem sending data to the server"
                        "with reason: {}".format(r.reason)))
        except requests.exceptions.ConnectionError:
            logger.error(
                colored_print(
                    "ERROR: Could not establish connection with server. The server"
                    " may be down or there is a problem with your internet "
                    "connection.", "red_bold"))
            sys.exit(1)
Exemple #3
0
    def broadcast_status(self):

        logger.info(colored_print("Preparing broadcast data...", "green_bold"))

        run_hash = self._get_run_hash()
        dict_dag = self._dag_file_to_dict()
        _broadcast_sent = False
        self._establish_connection(run_hash, dict_dag)

        stay_alive = True
        try:
            while stay_alive:

                if not _broadcast_sent:
                    self._print_msg(run_hash)
                    _broadcast_sent = True

                self.update_inspection()
                if self.send:
                    self._send_status_info(run_hash)
                    self.send = False

                sleep(self.refresh_rate)

        except FileNotFoundError:
            logger.error(
                colored_print(
                    "ERROR: nextflow log and/or trace files are no longer "
                    "reachable!", "red_bold"))
        except Exception as e:
            logger.error("ERROR: ", sys.exc_info()[0])
        finally:
            logger.info("Closing connection")
            self._close_connection(run_hash)
def check_arguments(args):

    passed = True

    # Check if no args are passed
    if len(sys.argv) == 1:
        logger.info(
            colored_print("Please provide one of the supported "
                          "arguments!", "red_bold"))
        passed = False

    # Check if output argument is valid
    # Check if output file was provided, if it is not a directory, and if
    # the directory exists
    if not args.output_nf \
            or os.path.isdir(args.output_nf) \
            or (os.path.dirname(args.output_nf) and
                not os.path.isdir(os.path.dirname(args.output_nf))):
        logger.info(
            colored_print(
                "Please provide a valid output file and "
                "location!", "red_bold"))
        passed = False

    return passed
Exemple #5
0
def guess_process(query_str, process_map):
    """
    Function to guess processes based on strings that are not available in
    process_map. If the string has typos and is somewhat similar (50%) to any
    process available in flowcraft it will print info to the terminal,
    suggesting the most similar processes available in flowcraft.

    Parameters
    ----------
    query_str: str
        The string of the process with potential typos
    process_map:
        The dictionary that contains all the available processes

    """

    save_list = []
    # loops between the processes available in process_map
    for process in process_map:
        similarity = SequenceMatcher(None, process, query_str)
        # checks if similarity between the process and the query string is
        # higher than 50%
        if similarity.ratio() > 0.5:
            save_list.append(process)

    # checks if any process is stored in save_list
    if save_list:
        logger.info(colored_print(
            "Maybe you meant:\n\t{}".format("\n\t".join(save_list)), "white"))

    logger.info(colored_print("Hint: check the available processes by using "
                              "the '-l' or '-L' flag.", "white"))
def validate_build_arguments(args):

    if not args.tasks and not args.recipe and not args.check_only \
            and not args.detailed_list and not args.short_list:
        logger.error(
            colored_print(
                "At least one of these options is required: -t, -r, -c, "
                "-l, -L", "red_bold"))
        sys.exit(1)

    if (args.tasks or args.recipe) and not args.output_nf:
        logger.error(
            colored_print(
                "Please provide the path and name of the pipeline file using the"
                " -o option.", "red_bold"))
        sys.exit(1)

    if args.output_nf:
        parsed_output_nf = (args.output_nf if args.output_nf.endswith(".nf")
                            else "{}.nf".format(args.output_nf))
        opath = parsed_output_nf
        if os.path.dirname(opath):
            parent_dir = os.path.dirname(opath)
            if not os.path.exists(parent_dir):
                logger.error(
                    colored_print(
                        "The provided directory '{}' does not exist.".format(
                            parent_dir), "red_bold"))
                sys.exit(1)

        return parsed_output_nf
Exemple #7
0
def validate_build_arguments(args):

    # Skip all checks when listing the processes
    if args.detailed_list or args.short_list:
        return

    # Skill all checks when exporting parameters AND providing at least one
    # component
    if args.export_params:
        # Check if components provided
        if not args.tasks:
            logger.error(
                colored_print(
                    "At least one component needs to be provided via the -t option"
                    " when exporting parameters in JSON format"))
            sys.exit(1)
        return

    # When none of the main run options is specified
    if not args.tasks and not args.recipe and not args.check_only \
            and not args.detailed_list and not args.short_list:
        logger.error(
            colored_print(
                "At least one of these options is required: -t, -r, -c, "
                "-l, -L", "red_bold"))
        sys.exit(1)

    # When the build mode is active via tasks or recipe, but no output file
    # option has been provided
    if (args.tasks or args.recipe) and not args.check_recipe \
            and not args.output_nf:
        logger.error(
            colored_print(
                "Please provide the path and name of the pipeline file using the"
                " -o option.", "red_bold"))
        sys.exit(1)

    if args.output_nf:
        if not os.path.basename(args.output_nf):
            logger.error(
                colored_print(
                    "Output pipeline path '{}' missing a name (only the directory "
                    "path was provided)".format(args.output_nf), "red_bold"))
            sys.exit(1)

        parsed_output_nf = (args.output_nf if args.output_nf.endswith(".nf")
                            else "{}.nf".format(args.output_nf.strip()))
        opath = parsed_output_nf
        if os.path.dirname(opath):
            parent_dir = os.path.dirname(opath)
            if not os.path.exists(parent_dir):
                logger.error(
                    colored_print(
                        "The provided directory '{}' does not exist.".format(
                            parent_dir), "red_bold"))
                sys.exit(1)

        return parsed_output_nf
Exemple #8
0
    def broadcast_report(self):

        logger.info(
            colored_print("Preparing to broacast reports...", "green_bold"))

        report_hash = self._get_report_id()

        # When in watch mode,
        if self.watch:
            logger.info(
                colored_print("\tFetching pipeline run status", "green_bold"))
            self._update_pipeline_status()
            logger.info(
                colored_print("\tSending initial request to test service",
                              "green_bold"))
            self._init_live_reports(report_hash)
            logger.info(
                colored_print("\tInitial parsing of trace file", "green_bold"))
            self.update_trace_watch()

            self._print_msg(report_hash)

        logger.debug("Establishing connection...")

        stay_alive = True
        _broadcast_sent = False
        try:
            while stay_alive:

                # When not in watch mode, send the report JSON once
                if not _broadcast_sent and not self.watch:
                    self._send_report(report_hash)
                    self._print_msg(report_hash)
                    _broadcast_sent = True

                # When in watch mode, continuously monitor the trace file for
                # updates
                if self.watch:
                    self.update_trace_watch()
                    self.update_log_watch()
                    # When new report JSON files are available, send then
                    # via a PUT request
                    if self.send:
                        self._send_live_report(report_hash)
                        self.send = False

                sleep(self.refresh_rate)

        except FileNotFoundError as e:
            print(e)
            logger.error(
                colored_print("ERROR: Report JSON file is not reachable!",
                              "red_bold"))
        except Exception as e:
            logger.exception("ERROR: " + e)
        finally:
            logger.info("Closing connection")
            self._close_connection(report_hash)
Exemple #9
0
def list_recipes(full=False):
    """Method that iterates over all available recipes and prints their
    information to the standard output

    Parameters
    ----------
    full : bool
        If true, it will provide the pipeline string along with the recipe name
    """

    logger.info(
        colored_print("\n===== L I S T   O F   R E C I P E S =====\n",
                      "green_bold"))

    # This will iterate over all modules included in the recipes subpackage
    # It will return the import class and the module name, algon with the
    # correct prefix
    prefix = "{}.".format(recipes.__name__)
    for importer, modname, _ in pkgutil.iter_modules(recipes.__path__, prefix):

        # Import the current module
        _module = importer.find_module(modname).load_module(modname)

        # Fetch all available classes in module
        _recipe_classes = [
            cls for cls in _module.__dict__.values() if isinstance(cls, type)
        ]

        # Iterate over each Recipe class, and check for a match with the
        # provided recipe name.
        for cls in _recipe_classes:

            recipe_cls = cls()

            if hasattr(recipe_cls, "name"):
                logger.info(
                    colored_print("=> {}".format(recipe_cls.name),
                                  "blue_bold"))
                if full:
                    logger.info(
                        colored_print("\t {}".format(recipe_cls.__doc__),
                                      "purple_bold"))
                    logger.info(
                        colored_print(
                            "Pipeline string: {}\n".format(
                                recipe_cls.pipeline_str), "yellow_bold"))

    sys.exit(0)
Exemple #10
0
    def _init_live_reports(self, report_id):
        """Sends a POST request to initialize the live reports

        Parameters
        ----------
        report_id : str
            Hash of the report JSON as retrieved from :func:`~_get_report_hash`
        """

        logger.debug("Sending initial POST request to {} to start report live"
                     " update".format(self.broadcast_address))

        try:
            with open(".metadata.json") as fh:
                metadata = [json.load(fh)]
        except:
            metadata = []

        start_json = {"data": {"results": metadata}}

        try:
            requests.post(self.broadcast_address,
                          json={
                              "run_id": report_id,
                              "report_json": start_json,
                              "status": self.status_info
                          })
        except requests.exceptions.ConnectionError:
            logger.error(
                colored_print(
                    "ERROR: Could not establish connection with server. The server"
                    " may be down or there is a problem with your internet "
                    "connection.", "red_bold"))
            sys.exit(1)
Exemple #11
0
    def _print_msg(self, run_id):

        report_address = "{}reports/broadcast/{}".format(self.app_address,
                                                         run_id)
        logger.info(colored_print(
            "The pipeline reports are available in the following link:",
            "green_bold"))
        logger.info("{}".format(report_address))
Exemple #12
0
    def _close_connection(self, run_id):

        try:
            r = requests.delete(self.broadcast_address,
                                json={"run_id": run_id})
            if r.status_code != 202:
                logger.error(
                    colored_print(
                        "ERROR: There was a problem sending data to the server"
                        "with reason: {}".format(r.reason)))
        except requests.exceptions.ConnectionError:
            logger.error(
                colored_print(
                    "ERROR: Could not establish connection with server. The server"
                    " may be down or there is a problem with your internet "
                    "connection.", "red_bold"))
            sys.exit(1)
Exemple #13
0
    def _print_msg(self, run_id):

        inspect_address = "{}inspect/{}".format(self.app_address, run_id)
        logger.info(
            colored_print(
                "Starting broadcast. You can see the pipeline progress on the "
                "link below:", "green_bold"))
        logger.info("{}".format(inspect_address))
Exemple #14
0
def inspect(args):

    try:
        nf_inspect = NextflowInspector(args.trace_file, args.refresh_rate,
                                       args.pretty, args.url)
        if args.mode == "overview":
            nf_inspect.display_overview()

        if args.mode == "broadcast":
            nf_inspect.broadcast_status()

    except eh.InspectionError as ie:
        logger.error(colored_print(ie.value, "red_bold"))
        sys.exit(1)

    except eh.LogError as le:
        logger.error(colored_print(le.value, "red_bold"))
        sys.exit(1)
Exemple #15
0
def report(args):

    try:
        fc_report = FlowcraftReport(report_file=args.report_file,
                                    trace_file=args.trace_file,
                                    log_file=args.log_file,
                                    watch=args.watch,
                                    ip_addr=args.url)

        fc_report.broadcast_report()

    except eh.ReportError as re:
        logger.error(colored_print(re.value, "red_bold"))
        sys.exit(1)

    except eh.LogError as le:
        logger.error(colored_print(le.value, "red_bold"))
        sys.exit(1)
def check_arguments(args):

    # Check if no args are passed
    if len(sys.argv) == 1:
        logger.info(
            colored_print("Please provide one of the supported "
                          "arguments!", "red_bold"))
        return False

    return True
Exemple #17
0
    def _set_channels(self):
        """Sets the main channels for the pipeline

        This method will parse de the :attr:`~Process.processes` attribute
        and perform the following tasks for each process:

            - Sets the input/output channels and main input forks and adds
              them to the process's
              :attr:`assemblerflow.process.Process._context`
              attribute (See
              :func:`~NextflowGenerator.set_channels`).
            - Automatically updates the main input channel of the first
              process of each lane so that they fork from the user provide
              parameters (See
              :func:`~NextflowGenerator._update_raw_input`).
            - Check for the presence of secondary inputs and adds them to the
              :attr:`~NextflowGenerator.secondary_inputs` attribute.
            - Check for the presence of secondary channels and adds them to the
              :attr:`~NextflowGenerator.secondary_channels` attribute.

        Notes
        -----
        **On the secondary channel setup**: With this approach, there can only
        be one secondary link start for each type of secondary link. For
        instance, If there are two processes that start a secondary channel
        for the ``SIDE_max_len`` channel, only the last one will be recorded,
        and all receiving processes will get the channel from the latest
        process. Secondary channels can only link if the source process if
        downstream of the sink process in its "forking" path.
        """

        logger.debug("=====================")
        logger.debug("Setting main channels")
        logger.debug("=====================")

        for i, p in enumerate(self.processes):

            # Set main channels for the process
            logger.debug("[{}] Setting main channels with pid: {}".format(
                p.template, i))
            p.set_channels(pid=i)

            # If there is no parent lane, set the raw input channel from user
            if not p.parent_lane and p.input_type:
                self._update_raw_input(p)

            self._update_secondary_inputs(p)

            self._update_secondary_channels(p)

            logger.info(
                colored_print("\tChannels set for {} \u2713".format(
                    p.template)))
Exemple #18
0
def build(args):

    welcome = [
        "========= F L O W C R A F T =========",
        "Build mode\n"
        "version: {}".format(__version__),
        "build: {}".format(__build__),
        "====================================="
    ]

    parsed_output_nf = validate_build_arguments(args)

    logger.info(colored_print("\n".join(welcome), "green_bold"))

    # If a recipe is specified, build pipeline based on the
    # appropriate recipe
    if args.recipe:
        pipeline_string, list_processes = brew_recipe(args)
    else:
        pipeline_string = args.tasks
        list_processes = None

    # used for lists print
    proc_collector(process_map, args, list_processes)

    logger.info(colored_print("Resulting pipeline string:\n"))
    logger.info(colored_print(pipeline_string + "\n"))

    try:
        logger.info(colored_print("Checking pipeline for errors..."))
        pipeline_list = parse_pipeline(pipeline_string)
    except SanityError as e:
        logger.error(colored_print(e.value, "red_bold"))
        sys.exit(1)
    logger.debug("Pipeline successfully parsed: {}".format(pipeline_list))

    # Exit if only the pipeline parser needs to be checked
    if args.check_only:
        sys.exit()

    nfg = NextflowGenerator(process_connections=pipeline_list,
                            nextflow_file=parsed_output_nf,
                            pipeline_name=args.pipeline_name,
                            auto_dependency=args.no_dep)

    logger.info(colored_print("Building your awesome pipeline..."))

    # building the actual pipeline nf file
    nfg.build()

    # copy template to cwd, to allow for immediate execution
    if not args.pipeline_only:
        copy_project(parsed_output_nf)

    logger.info(colored_print("DONE!", "green_bold"))
Exemple #19
0
def brew_recipe(args):
    """Brews a given list of processes according to the recipe

    Parameters
    ----------
    args : argparse.Namespace
        The arguments passed through argparser that will be used to check the
        the recipe, tasks and brew the process

    Returns
    -------
    str
        The final pipeline string, ready for the engine.
    list
        List of process strings.
    """

    # Exit if recipe does not exist
    if args.recipe not in available_recipes:
        logger.error(
            colored_print(
                "Please provide a recipe to use in automatic "
                "mode.", "red_bold"))
        sys.exit(1)

    # Create recipe class instance
    automatic_pipeline = available_recipes[args.recipe]()

    if not args.tasks:
        input_processes = " ".join(
            automatic_pipeline.process_descriptions.keys())
    else:
        input_processes = args.tasks

    # Get the list of processes for that recipe
    list_processes = automatic_pipeline.get_process_info()
    # Validate the provided pipeline processes
    validated = automatic_pipeline.validate_pipeline(input_processes)
    if not validated:
        sys.exit(1)
    # Get the final pipeline string
    pipeline_string = automatic_pipeline.run_auto_pipeline(input_processes)

    return pipeline_string, list_processes
Exemple #20
0
    def _send_report(self, report_id):

        with open(self.report_file) as fh:
            report_json = json.loads(fh.read())

        logger.debug("Unique payload sent with size: {}".format(
            asizeof(json.dumps(report_json))
        ))

        try:
            requests.post(
                self.broadcast_address,
                json={"run_id": report_id, "report_json": report_json}
            )
        except requests.exceptions.ConnectionError:
            logger.error(colored_print(
                "ERROR: Could not establish connection with server. The server"
                " may be down or there is a problem with your internet "
                "connection.", "red_bold"))
            sys.exit(1)
Exemple #21
0
    def validate_pipeline(pipeline_string):
        """Validate pipeline string

        Validates the pipeline string by searching for forbidden characters

        Parameters
        ----------
        pipeline_string : str
            STring with the processes provided

        Returns
        -------

        """
        if "(" in pipeline_string or ")" in pipeline_string or "|" in \
                pipeline_string:
            logger.error(
                colored_print("Please provide a valid task list!", "red_bold"))
            return False

        return True
Exemple #22
0
    def _dag_file_to_dict(self):
        """Function that opens the dotfile named .treeDag.json in the current
        working directory

        Returns
        -------
        Returns a dictionary with the dag object to be used in the post
        instance available through the method _establish_connection

        """
        try:
            dag_file = open(os.path.join(self.workdir, ".treeDag.json"))
            dag_json = json.load(dag_file)
        except (FileNotFoundError, json.decoder.JSONDecodeError):
            logger.warning(
                colored_print(
                    "WARNING: dotfile named .treeDag.json not found or corrupted",
                    "red_bold"))
            dag_json = {}

        return dag_json
Exemple #23
0
def brew_recipe(recipe_name):
    """Returns a pipeline string from a recipe name.

    Parameters
    ----------
    recipe_name : str
        Name of the recipe. Must match the name attribute in one of the classes
        defined in :mod:`flowcraft.generator.recipes`

    Returns
    -------
    str
        Pipeline string ready for parsing and processing by flowcraft engine
    """

    # This will iterate over all modules included in the recipes subpackage
    # It will return the import class and the module name, algon with the
    # correct prefix
    prefix = "{}.".format(recipes.__name__)
    for importer, modname, _ in pkgutil.iter_modules(recipes.__path__, prefix):

        # Import the current module
        _module = importer.find_module(modname).load_module(modname)

        # Fetch all available classes in module
        _recipe_classes = [
            cls for cls in _module.__dict__.values() if isinstance(cls, type)
        ]

        # Iterate over each Recipe class, and check for a match with the
        # provided recipe name.
        for cls in _recipe_classes:
            # Create instance of class to allow fetching the name attribute
            recipe_cls = cls()
            if getattr(recipe_cls, "name", None) == recipe_name:
                return recipe_cls.brew()

    logger.error(
        colored_print("Recipe name '{}' does not exist.".format(recipe_name)))
    sys.exit(1)
Exemple #24
0
    def display_overview(self):
        """Displays the default pipeline inspection overview
        """

        stay_alive = True

        self.screen = curses.initscr()

        self.screen.keypad(True)
        self.screen.nodelay(-1)
        curses.cbreak()
        curses.noecho()
        curses.start_color()

        self.screen_lines = self.screen.getmaxyx()[0]
        # self.screen_width = self.screen.getmaxyx()[1]

        try:
            while stay_alive:

                # Provide functionality to certain keybindings
                self._curses_keybindings()
                # Updates main inspector attributes
                self.update_inspection()
                # Display curses interface
                self.flush_overview()

                sleep(self.refresh_rate)
        except FileNotFoundError:
            sys.stderr.write(
                colored_print(
                    "ERROR: nextflow log and/or trace files are no longer "
                    "reachable!", "red_bold"))
        except Exception as e:
            sys.stderr.write(str(e))
        finally:
            curses.nocbreak()
            self.screen.keypad(0)
            curses.echo()
            curses.endwin()
Exemple #25
0
    def _send_status_info(self, run_id):

        mappings, data = self._prepare_table_data()
        overview_data = self._prepare_overview_data()
        general_details = self._prepare_general_details()
        status_data = self._prepare_run_status_data()

        status_json = {
            "generalOverview": overview_data,
            "generalDetails": general_details,
            "tableData": data,
            "tableMappings": mappings,
            "processInfo": self._convert_process_dict(),
            "processTags": self.process_tags,
            "runStatus": status_data,
            "timeStart": str(self.time_start),
            "timeStop": str(self.time_stop) if self.time_stop else "-",
            "processes": list(self.processes)
        }

        self._c += 1
        logger.debug("Payload [{}] sent with size: {}".format(
            self._c, asizeof.asizeof(json.dumps(status_json))))

        try:
            requests.put(self.broadcast_address,
                         json={
                             "run_id": run_id,
                             "status_json": status_json
                         })
        except requests.exceptions.ConnectionError:
            logger.error(
                colored_print(
                    "ERROR: Could not establish connection with server. The server"
                    " may be down or there is a problem with your internet "
                    "connection.", "red_bold"))
            sys.exit(1)
Exemple #26
0
def build(args):

    # Disable standard logging for stdout when the following modes are
    #  executed:
    if args.export_params or args.export_directives:
        logger.setLevel(logging.ERROR)

    welcome = [
        "========= F L O W C R A F T =========",
        "Build mode\n"
        "version: {}".format(__version__),
        "build: {}".format(__build__),
        "====================================="
    ]

    parsed_output_nf = validate_build_arguments(args)

    logger.info(colored_print("\n".join(welcome), "green_bold"))

    # If a recipe is specified, build pipeline based on the
    # appropriate recipe
    if args.recipe:
        if args.recipe == "innuendo":
            pipeline_string = brew_recipe(args, available_recipes)
        else:
            pipeline_string = available_recipes[args.recipe]
            if args.tasks:
                logger.warning(colored_print(
                    "-t parameter will be ignored for recipe: {}\n"
                        .format(args.recipe), "yellow_bold")
                )

        if args.check_recipe:
            logger.info(colored_print("Pipeline string for recipe: {}"
                                      .format(args.recipe), "purple_bold"))
            logger.info(pipeline_string)
            sys.exit(0)
    else:
        pipeline_string = args.tasks

    # used for lists print
    proc_collector(process_map, args, pipeline_string)

    try:
        logger.info(colored_print("Checking pipeline for errors..."))
        pipeline_list = parse_pipeline(pipeline_string)
    except SanityError as e:
        logger.error(colored_print(e.value, "red_bold"))
        sys.exit(1)
    logger.debug("Pipeline successfully parsed: {}".format(pipeline_list))

    # Exit if only the pipeline parser needs to be checked
    if args.check_only:
        sys.exit()

    nfg = NextflowGenerator(process_connections=pipeline_list,
                            nextflow_file=parsed_output_nf,
                            pipeline_name=args.pipeline_name,
                            auto_dependency=args.no_dep,
                            merge_params=args.merge_params,
                            export_params=args.export_params)

    logger.info(colored_print("Building your awesome pipeline..."))

    if args.export_params:
        nfg.export_params()
        sys.exit(0)
    elif args.export_directives:
        nfg.export_directives()
        sys.exit(0)
    else:
        # building the actual pipeline nf file
        nfg.build()

    # copy template to cwd, to allow for immediate execution
    if not args.pipeline_only:
        copy_project(parsed_output_nf)

    logger.info(colored_print("DONE!", "green_bold"))
Exemple #27
0
    def build_upstream(self, process_descriptions, task, all_tasks,
                       task_pipeline, count_forks, total_tasks, forks):
        """Builds the upstream pipeline of the current process

        Checks for the upstream processes to the current process and
        adds them to the current pipeline fragment if they were provided in
        the process list.

        Parameters
        ----------
        process_descriptions : dict
            Information of processes input, output and if is forkable
        task : str
            Current process
        all_tasks : list
            A list of all provided processes
        task_pipeline : list
            Current pipeline fragment
        count_forks : int
            Current number of forks
        total_tasks : str
            All space separated processes
        forks : list
            Current forks
        Returns
        -------
        list : resulting pipeline fragment
        """
        if task in process_descriptions:
            if process_descriptions[task][1] is not None:
                if len(process_descriptions[task][1].split("|")) > 1:
                    local_forks = process_descriptions[task][1].split("|")

                    # Produces a new pipeline fragment for each forkable
                    #  process
                    for local_fork in local_forks:
                        if local_fork in total_tasks:
                            count_forks += 1
                            task_pipeline.insert(0,
                                                 process_descriptions[task][1])
                            self.define_pipeline_string(
                                process_descriptions, local_fork, False, True,
                                count_forks, total_tasks, forks)

                    return task_pipeline
                else:
                    # Adds the process to the pipeline fragment in case it is
                    # provided in the task list
                    if process_descriptions[task][1] in total_tasks:
                        task_pipeline.insert(
                            0, process_descriptions[task][1].split("|")[0])

                        # Proceeds building upstream until the input for a
                        # process is None
                        self.build_upstream(
                            process_descriptions,
                            process_descriptions[task][1].split("|")[0],
                            all_tasks, task_pipeline, count_forks, total_tasks,
                            forks)
                    else:
                        logger.error(
                            colored_print(
                                "{} not in provided protocols as "
                                "input for {}".format(
                                    process_descriptions[task][1], task),
                                "red_bold"))

                        sys.exit()

                    return task_pipeline
            else:
                return task_pipeline
Exemple #28
0
    def define_pipeline_string(self, process_descriptions, tasks,
                               check_upstream, check_downstream, count_forks,
                               total_tasks, forks):
        """Builds the possible forks and connections between the provided
        processes

        This method loops through all the provided tasks and builds the
        upstream and downstream pipeline if required. It then returns all
        possible forks than need to be merged à posteriori`

        Parameters
        ----------
        process_descriptions : dict
            Information of processes input, output and if is forkable
        tasks : str
            Space separated processes
        check_upstream : bool
            If is to build the upstream pipeline of the current task
        check_downstream : bool
            If is to build the downstream pipeline of the current task
        count_forks : int
            Number of current forks
        total_tasks : str
            All space separated processes
        forks : list
            Current forks

        Returns
        -------
        list : List with all the possible pipeline forks
        """

        tasks_array = tasks.split()

        for task_unsplit in tasks_array:
            task = task_unsplit.split("=")[0]

            if task not in process_descriptions.keys():
                logger.error(
                    colored_print(
                        "{} not in the possible processes".format(task),
                        "red_bold"))

                sys.exit()
            else:
                process_split = task_unsplit.split("=")

                if len(process_split) > 1:
                    self.process_to_id[process_split[0]] = process_split[1]

            # Only uses the process if it is not already in the possible forks
            if not bool([x for x in forks if task in x]) and not bool(
                [y for y in forks if process_descriptions[task][2] in y]):
                task_pipeline = []

                if task in process_descriptions:

                    if check_upstream:
                        task_pipeline = self.build_upstream(
                            process_descriptions, task, tasks_array,
                            task_pipeline, count_forks, total_tasks, forks)

                    task_pipeline.append(task)

                    if check_downstream:
                        task_pipeline = self.build_downstream(
                            process_descriptions, task, tasks_array,
                            task_pipeline, count_forks, total_tasks, forks)

                # Adds the pipeline fragment to the list of possible forks
                forks.append(list(OrderedDict.fromkeys(task_pipeline)))

            # Checks for task in fork. Case order of input processes is reversed
            elif bool([y for y in forks
                       if process_descriptions[task][2] in y]):
                for fork in forks:
                    if task not in fork:
                        try:
                            dependent_index = fork.index(
                                process_descriptions[task][2])
                            fork.insert(dependent_index, task)
                        except ValueError:
                            continue

        for i in range(0, len(forks)):
            for j in range(0, len(forks[i])):
                try:
                    if len(forks[i][j].split("|")) > 1:
                        forks[i][j] = forks[i][j].split("|")
                        tmp_fork = []
                        for s in forks[i][j]:
                            if s in total_tasks:
                                tmp_fork.append(s)

                        forks[i][j] = tmp_fork

                except AttributeError as e:
                    continue

        return forks
Exemple #29
0
    def build(self):
        """Main pipeline builder

        This method is responsible for building the
        :py:attr:`NextflowGenerator.template` attribute that will contain
        the nextflow code of the pipeline.

        First it builds the header, then sets the main channels, the
        secondary inputs, secondary channels and finally the
        status channels. When the pipeline is built, is writes the code
        to a nextflow file.
        """

        logger.info(
            colored_print("\tSuccessfully connected {} process(es) with {} "
                          "fork(s) across {} lane(s) \u2713".format(
                              len(self.processes[1:]), len(self._fork_tree),
                              self.lanes)))

        # Generate regular nextflow header that sets up the shebang, imports
        # and all possible initial channels
        self._build_header()

        self._set_channels()

        pipeline_to_json = self.render_pipeline()

        self._set_secondary_inputs()

        logger.info(
            colored_print(
                "\tSuccessfully set {} secondary input(s) \u2713".format(
                    len(self.secondary_inputs))))

        self._set_secondary_channels()

        logger.info(
            colored_print(
                "\tSuccessfully set {} secondary channel(s) \u2713".format(
                    len(self.secondary_channels))))

        self._set_compiler_channels()

        self._set_configurations()

        logger.info(
            colored_print("\tFinished configurations \u2713".format(
                len(self.secondary_channels))))

        for p in self.processes:
            self.template += p.template_str

        self._build_footer()

        project_root = dirname(self.nf_file)

        # Write pipeline file
        with open(self.nf_file, "w") as fh:
            fh.write(self.template)

        # Write resources config
        with open(join(project_root, "resources.config"), "w") as fh:
            fh.write(self.resources)

        # Write containers config
        with open(join(project_root, "containers.config"), "w") as fh:
            fh.write(self.containers)

        # Write containers config
        with open(splitext(self.nf_file)[0] + ".html", "w") as fh:
            fh.write(pipeline_to_json)

        logger.info(
            colored_print("\tPipeline written into {} \u2713".format(
                self.nf_file)))
Exemple #30
0
    def _send_live_report(self, report_id):
        """Sends a PUT request with the report JSON files currently in the
        report_queue attribute.

        Parameters
        ----------
        report_id : str
            Hash of the report JSON as retrieved from :func:`~_get_report_hash`
        """

        # Determines the maximum number of reports sent at the same time in
        # the same payload
        buffer_size = 100
        logger.debug("Report buffer size set to: {}".format(buffer_size))

        for i in range(0, len(self.report_queue), buffer_size):

            # Reset the report compilation batch
            reports_compilation = []

            # Iterate over report JSON batches determined by buffer_size
            for report in self.report_queue[i:i + buffer_size]:
                try:
                    report_file = [
                        x for x in os.listdir(report) if x.endswith(".json")
                    ][0]
                except IndexError:
                    continue
                with open(join(report, report_file)) as fh:
                    reports_compilation.append(json.loads(fh.read()))

            logger.debug("Payload sent with size: {}".format(
                asizeof(json.dumps(reports_compilation))))
            logger.debug("status: {}".format(self.status_info))

            try:
                requests.put(self.broadcast_address,
                             json={
                                 "run_id": report_id,
                                 "report_json": reports_compilation,
                                 "status": self.status_info
                             })
            except requests.exceptions.ConnectionError:
                logger.error(
                    colored_print(
                        "ERROR: Could not establish connection with server. The server"
                        " may be down or there is a problem with your internet "
                        "connection.", "red_bold"))
                sys.exit(1)

        # When there is no change in the report queue, but there is a change
        # in the run status of the pipeline
        if not self.report_queue:

            logger.debug("status: {}".format(self.status_info))

            try:
                requests.put(self.broadcast_address,
                             json={
                                 "run_id": report_id,
                                 "report_json": [],
                                 "status": self.status_info
                             })
            except requests.exceptions.ConnectionError:
                logger.error(
                    colored_print(
                        "ERROR: Could not establish connection with server. The"
                        " server may be down or there is a problem with your "
                        "internet connection.", "red_bold"))
                sys.exit(1)

        # Reset the report queue after sending the request
        self.report_queue = []