Ejemplo n.º 1
0
def form_edit_pipeline_config_file(pipeline_type):
    '''
    Edit a pipeline config file to change the parameters of each process.
    '''

    # initialize the control variable
    OK = True

    # set the pipeline name
    if pipeline_type == xlib.get_toa_process_pipeline_nucleotide_code():
        name = xlib.get_toa_process_pipeline_nucleotide_name()
    elif pipeline_type == xlib.get_toa_process_pipeline_aminoacid_code():
        name = xlib.get_toa_process_pipeline_aminoacid_name()
    elif pipeline_type == xlib.get_toa_process_merge_annotations_code():
        name = xlib.get_toa_process_merge_annotations_name()

    # print the header
    clib.clear_screen()
    clib.print_headers_with_environment(f'{name} - Edit config file')

    # get the config file
    if pipeline_type == xlib.get_toa_process_pipeline_nucleotide_code():
        config_file = xtoa.get_nucleotide_pipeline_config_file()
    elif pipeline_type == xlib.get_toa_process_pipeline_aminoacid_code():
        config_file = xtoa.get_aminoacid_pipeline_config_file()
    elif pipeline_type == xlib.get_toa_process_merge_annotations_code():
        config_file = xtoa.get_annotation_merger_config_file()

    # edit the read transfer config file
    print(xlib.get_separator())
    print(f'Editing the {name} config file ...')
    command = f'{xlib.get_editor()} {config_file}'
    rc = subprocess.call(command, shell=True)
    if rc != 0:
        print(f'*** ERROR: RC {rc} in command -> {command}')
        OK = False

    # check the config file
    if OK:
        print(xlib.get_separator())
        print(f'Checking the {name} config file ...')
        if pipeline_type == xlib.get_toa_process_pipeline_nucleotide_code():
            (OK, error_list) = xtoa.check_pipeline_config_file(pipeline_type,
                                                               strict=False)
        elif pipeline_type == xlib.get_toa_process_pipeline_aminoacid_code():
            (OK, error_list) = xtoa.check_pipeline_config_file(pipeline_type,
                                                               strict=False)
        elif pipeline_type == xlib.get_toa_process_merge_annotations_code():
            (OK, error_list) = xtoa.check_annotation_merger_config_file(
                strict=False)
        if OK:
            print('The file is OK.')
        else:
            print()
            for error in error_list:
                print(error)

    # show continuation message
    print(xlib.get_separator())
    input('Press [Intro] to continue ...')
Ejemplo n.º 2
0
def form_run_pipeline_process(pipeline_type):
    '''
    Run a pipeline process with the parameters in the corresponding config file.
    '''

    # initialize the control variable
    OK = True

    # set the pipeline name
    if pipeline_type == xlib.get_toa_process_pipeline_nucleotide_code():
        name = xlib.get_toa_process_pipeline_nucleotide_name()

    elif pipeline_type == xlib.get_toa_process_pipeline_aminoacid_code():
        name = xlib.get_toa_process_pipeline_aminoacid_name()

    elif pipeline_type == xlib.get_toa_process_merge_annotations_code():
        name = xlib.get_toa_process_merge_annotations_name()

    # print the header
    clib.clear_screen()
    clib.print_headers_with_environment(f'{name} - Run process')

    # confirm the process run
    if OK:
        print(xlib.get_separator())
        OK = clib.confirm_action(f'The {name} process is going to be run.')

    # run the process
    if OK:

        if pipeline_type == xlib.get_toa_process_pipeline_nucleotide_code():
            devstdout = xlib.DevStdOut(xtoa.run_pipeline_process.__name__)
            OK = xtoa.run_pipeline_process(pipeline_type,
                                           devstdout,
                                           function=None)

        elif pipeline_type == xlib.get_toa_process_pipeline_aminoacid_code():
            devstdout = xlib.DevStdOut(xtoa.run_pipeline_process.__name__)
            OK = xtoa.run_pipeline_process(pipeline_type,
                                           devstdout,
                                           function=None)

        elif pipeline_type == xlib.get_toa_process_merge_annotations_code():
            devstdout = xlib.DevStdOut(
                xtoa.run_annotation_merger_process.__name__)
            OK = xtoa.run_annotation_merger_process(devstdout, function=None)

    # show continuation message
    print(xlib.get_separator())
    input('Press [Intro] to continue ...')
Ejemplo n.º 3
0
def build_menu_toa_nucleotide_pipeline():
    '''
    Build the menu Nucleotide pipeline.
    '''

    while True:

        # print headers
        clib.clear_screen()
        clib.print_headers_with_environment(f'{xlib.get_toa_name()} {xlib.get_toa_process_pipeline_nucleotide_name()}')

        # print the menu options
        print( 'Options:')
        print()
        print( '    1. Recreate config file')
        print( '    2. Edit config file')
        print()
        print( '    3. Run pipeline')
        print( '    4. Restart pipeline')
        print()
        print( '    X. Return to menu Pipelines')
        print()

        # get the selected option
        option = input('Input the selected option: ').upper()

        # process the selected option
        if option == '1':
            ctoa.form_recreate_pipeline_config_file(xlib.get_toa_process_pipeline_nucleotide_code())
        elif option == '2':
            ctoa.form_edit_pipeline_config_file(xlib.get_toa_process_pipeline_nucleotide_code())
        elif option == '3':
            ctoa.form_run_pipeline_process(xlib.get_toa_process_pipeline_nucleotide_code())
        elif option == '4':
            ctoa.form_restart_pipeline_process(xlib.get_toa_process_pipeline_nucleotide_code())
        elif option == 'X':
            break
Ejemplo n.º 4
0
def form_restart_pipeline_process(pipeline_type):
    '''
    Restart a pipeline process from the last step ended OK.
    '''

    # initialize the control variable
    OK = True

    # set the pipeline name
    if pipeline_type == xlib.get_toa_process_pipeline_nucleotide_code():
        name = xlib.get_toa_process_pipeline_nucleotide_name()
    elif pipeline_type == xlib.get_toa_process_pipeline_aminoacid_code():
        name = xlib.get_toa_process_pipeline_aminoacid_name()

    # print the header
    clib.clear_screen()
    clib.print_headers_with_environment(f'{name} - Run process')

    # get the pipeline dataset identification
    app_list = [pipeline_type]
    pipeline_dataset_id = cinputs.input_result_dataset_id(
        xlib.get_toa_result_pipeline_dir(), app_list)
    if pipeline_dataset_id == '':
        print(f'WARNING: There are not any {pipeline_type} result datasets.')
        OK = False

    # confirm the process run
    if OK:
        print(xlib.get_separator())
        OK = clib.confirm_action(f'The {name} process is going to be run.')

    # run the process
    if OK:

        devstdout = xlib.DevStdOut(xtoa.restart_pipeline_process.__name__)
        OK = xtoa.restart_pipeline_process(pipeline_type,
                                           pipeline_dataset_id,
                                           devstdout,
                                           function=None)

    # show continuation message
    print(xlib.get_separator())
    input('Press [Intro] to continue ...')
Ejemplo n.º 5
0
Archivo: glog.py Proyecto: GGFHF/TOA
    def execute(self, event=None):
        '''
        Execute the list the result logs in the cluster.
        '''

        # if "button_execute" is disabled, exit function
        if str(self.button_execute['state']) == 'disabled':
            return

        # check inputs
        OK = self.check_inputs()
        if not OK:
            message = 'Some input values are not OK.'
            tkinter.messagebox.showerror(
                f'{xlib.get_short_project_name()} - {self.head}', message)

        # get the dictionary of TOA configuration.
        if OK:
            toa_config_dict = xtoa.get_toa_config_dict()

        # get the run dictionary
        if OK:
            process_type_dir = f'{toa_config_dict["RESULT_DIR"]}/{self.wrapper_process_type.get()}'
            subdir_list = [
                subdir for subdir in os.listdir(process_type_dir)
                if os.path.isdir(os.path.join(process_type_dir, subdir))
            ]
            result_dataset_dict = {}
            for subdir in subdir_list:
                result_dataset_id = subdir
                try:
                    pattern = r'^(.+)\-(.+)\-(.+)$'
                    mo = re.search(pattern, result_dataset_id)
                    bioinfo_app_code = mo.group(1).strip()
                    yymmdd = mo.group(2)
                    hhmmss = mo.group(3)
                    date = f'20{yymmdd[:2]}-{yymmdd[2:4]}-{yymmdd[4:]}'
                    time = f'{hhmmss[:2]}:{hhmmss[2:4]}:{hhmmss[4:]}'
                except:
                    bioinfo_app_code = 'xxx'
                    date = '0000-00-00'
                    time = '00:00:00'

                if result_dataset_id.startswith(xlib.get_blastplus_code() +
                                                '-'):
                    bioinfo_app_name = xlib.get_blastplus_name()

                elif result_dataset_id.startswith(xlib.get_diamond_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_diamond_name()

                elif result_dataset_id.startswith(
                        xlib.get_entrez_direct_code() + '-'):
                    bioinfo_app_name = xlib.get_entrez_direct_name()

                elif result_dataset_id.startswith(xlib.get_miniconda3_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_miniconda3_name()

                elif result_dataset_id.startswith(xlib.get_r_code() + '-'):
                    bioinfo_app_name = xlib.get_r_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_basic_data_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_basic_data_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_dicots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_dicots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_gene_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_gene_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_go_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_go_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_gymno_01_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_gymno_01_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_interpro_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_interpro_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_monocots_04_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_monocots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_taxonomy_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_taxonomy_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.
                        get_toa_process_gilist_viridiplantae_nucleotide_gi_code(
                        ) + '-'):
                    bioinfo_app_name = xlib.get_toa_process_gilist_viridiplantae_nucleotide_gi_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.
                        get_toa_process_gilist_viridiplantae_protein_gi_code()
                        + '-'):
                    bioinfo_app_name = xlib.get_toa_process_gilist_viridiplantae_protein_gi_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_basic_data_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_basic_data_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_dicots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_dicots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_gene_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_gene_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_go_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_go_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_gymno_01_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_gymno_01_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_interpro_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_interpro_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_monocots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_monocots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_merge_annotations_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_merge_annotations_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_nr_blastplus_db_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_nr_blastplus_db_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_nr_diamond_db_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_nr_diamond_db_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_nt_blastplus_db_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_nt_blastplus_db_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_pipeline_aminoacid_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_pipeline_aminoacid_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_pipeline_nucleotide_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_pipeline_nucleotide_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_dicots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_dicots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_gymno_01_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_gymno_01_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_monocots_04_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_monocots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_refseq_plant_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_refseq_plant_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_rebuild_toa_database_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_get_toa_process_rebuild_toa_database_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_recreate_toa_database_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_get_toa_process_recreate_toa_database_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_transdecoder_code() + '-'):
                    bioinfo_app_name = xlib.get_transdecoder_name()

                else:
                    bioinfo_app_name = 'xxx'

                status_ok = os.path.isfile(
                    xlib.get_status_ok(os.path.join(process_type_dir, subdir)))
                status_wrong = os.path.isfile(
                    xlib.get_status_wrong(
                        os.path.join(process_type_dir, subdir)))
                if status_ok and not status_wrong:
                    status = 'OK'
                elif not status_ok and status_wrong:
                    status = 'wrong'
                elif not status_ok and not status_wrong:
                    status = 'not finished'
                elif status_ok and status_wrong:
                    status = 'undetermined'
                key = f'{bioinfo_app_name}-{result_dataset_id}'
                result_dataset_dict[key] = {
                    'process_type': self.wrapper_process_type.get(),
                    'bioinfo_app': bioinfo_app_name,
                    'result_dataset_id': result_dataset_id,
                    'date': date,
                    'time': time,
                    'status': status
                }

        # check if there are any nodes running
        if OK:
            if result_dataset_dict == {}:
                message = 'There is not any run.'
                tkinter.messagebox.showwarning(
                    f'{xlib.get_short_project_name()} - {self.head}', message)

        # build the data list
        if OK:
            data_list = [
                'process_type', 'bioinfo_app', 'result_dataset_id', 'date',
                'time', 'status'
            ]

        # build the data dictionary
        if OK:
            data_dict = {}
            data_dict['process_type'] = {
                'text': 'Process type',
                'width': 180,
                'alignment': 'left'
            }
            data_dict['bioinfo_app'] = {
                'text': 'Bioinfo app / Utility',
                'width': 340,
                'alignment': 'left'
            }
            data_dict['result_dataset_id'] = {
                'text': 'Result dataset',
                'width': 225,
                'alignment': 'left'
            }
            data_dict['date'] = {
                'text': 'Date',
                'width': 95,
                'alignment': 'right'
            }
            data_dict['time'] = {
                'text': 'Time',
                'width': 75,
                'alignment': 'right'
            }
            data_dict['status'] = {
                'text': 'Status',
                'width': 90,
                'alignment': 'left'
            }

        # create the dialog Table to show the nodes running
        if OK:
            dialog_table = gdialogs.DialogTable(
                self,
                f'Runs in {xlib.get_result_dir()}/{self.wrapper_process_type.get()}',
                400, 1030, data_list, data_dict, result_dataset_dict,
                sorted(result_dataset_dict.keys()), 'view_result_logs',
                ['revisar'])
            self.wait_window(dialog_table)

        # close the form
        if OK:
            self.close()
Ejemplo n.º 6
0
def get_result_dataset_dict(cluster_name, experiment_id, status, passed_connection, ssh_client):
    '''
    Get a dictionary with the result datasets of an experiment in the cluster.
    '''

    # initialize the control variable and the error list
    OK = True
    error_list = []

    # get the result directory in the cluster
    cluster_result_dir = xlib.get_cluster_result_dir()

    # initialize the dictionary of the result datasets
    result_dataset_dict = {}

    # create the SSH client connection
    if not passed_connection:
        (OK, error_list, ssh_client) = xssh.create_ssh_client_connection(cluster_name)

    # check the result directory is created
    if OK:
        command = '[ -d {0} ] && echo RC=0 || echo RC=1'.format(cluster_result_dir)
        (OK, stdout, stderr) = xssh.execute_cluster_command(ssh_client, command)
        if stdout[len(stdout) - 1] != 'RC=0':
            error_list.append('*** ERROR: There is not any volume mounted in the result directory.\n')
            error_list.append('You have to link a volume in the mounting point {0} for the cluster {1}.\n'.format(cluster_result_dir, cluster_name))
            OK = False

    # get the dictionary of the result datasets
    if OK:
        if status == 'uncompressed':
            command = 'cd  {0}/{1}; for list in `ls`; do ls -ld $list | grep -v ^- > /dev/null && echo $list; done;'.format(cluster_result_dir, experiment_id)
        elif status == 'compressed':
            command = 'cd {0}/{1}; for list in `ls`; do ls -ld $list | grep -v ^d > /dev/null && echo $list; done;'.format(cluster_result_dir, experiment_id)
        (OK, stdout, stderr) = xssh.execute_cluster_command(ssh_client, command)
        if OK:
            if status == 'uncompressed':
                input_pattern = '{0}-(.+)-(.+)'
                output_pattern = '{0} ({1} {2})'
            elif status == 'compressed':
                input_pattern = '{0}-(.+)-(.+).tar.gz'
                output_pattern = '{0} ({1} {2}) [compressed]'
            for line in stdout:
                line = line.rstrip('\n')
                if line != 'lost+found':
                    result_dataset_id = line
                    if result_dataset_id.startswith(xlib.get_bowtie2_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_bowtie2_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_bowtie2_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_busco_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_busco_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_busco_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_cd_hit_est_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_cd_hit_est_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_cd_hit_est_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_cutadapt_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_cutadapt_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_cutadapt_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_cuffdiff_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_cuffdiff_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_cuffdiff_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_cufflinks_cuffmerge_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_cufflinks_cuffmerge_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_cufflinks_cuffmerge_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_cuffnorm_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_cuffnorm_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_cuffnorm_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_cuffquant_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_cuffquant_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_cuffquant_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_ddradseq_simulation_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_ddradseq_simulation_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_ddradseq_simulation_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_fastqc_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_fastqc_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_fastqc_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_ggtrinity_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_ggtrinity_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_ggtrinity_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_gmap_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_gmap_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_gmap_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_gsnap_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_gsnap_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_gsnap_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_gzip_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_gzip_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_gzip_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_hisat2_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_hisat2_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_hisat2_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_htseq_count_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_htseq_count_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_htseq_count_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_insilico_read_normalization_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_insilico_read_normalization_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_insilico_read_normalization_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_ipyrad_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_ipyrad_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_ipyrad_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_kallisto_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_kallisto_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_kallisto_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_quast_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_quast_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_quast_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_ref_eval_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_ref_eval_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_ref_eval_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_rnaquast_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_rnaquast_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_rnaquast_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_rsem_eval_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_rsem_eval_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_rsem_eval_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_rsitesearch_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_rsitesearch_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_rsitesearch_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_soapdenovo2_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_soapdenovo2_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_soapdenovo2_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_soapdenovotrans_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_soapdenovotrans_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_soapdenovotrans_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_star_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_star_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_star_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_starcode_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_starcode_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_starcode_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_toa_process_pipeline_aminoacid_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_toa_process_pipeline_aminoacid_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_toa_process_pipeline_aminoacid_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_toa_process_pipeline_nucleotide_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_toa_process_pipeline_nucleotide_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_toa_process_pipeline_nucleotide_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_tophat_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_tophat_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_tophat_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_transabyss_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_transabyss_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_transabyss_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_transcript_filter_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_transcript_filter_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_transcript_filter_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_transcriptome_blastx_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_transcriptome_blastx_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_transcriptome_blastx_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_transrate_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_transrate_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_transrate_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_trimmomatic_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_trimmomatic_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_trimmomatic_name(), date, time)
                    elif result_dataset_id.startswith(xlib.get_trinity_code()+'-'):
                        mo = re.match(input_pattern.format(xlib.get_trinity_code()), result_dataset_id)
                        date = mo.group(1)
                        time = mo.group(2)
                        result_dataset_name = output_pattern.format(xlib.get_trinity_name(), date, time)
                    else:
                        result_dataset_name = result_dataset_id
                    result_dataset_dict[result_dataset_id] = {'result_dataset_id': result_dataset_id, 'result_dataset_name': result_dataset_name}

    # close the SSH client connection
    if OK and not passed_connection:
        xssh.close_ssh_client_connection(ssh_client)

    # return the control variable, error list and dictionary of the result datasets
    return (OK, error_list, result_dataset_dict)
Ejemplo n.º 7
0
def form_view_phylogenic_data_frecuency(stats_code):
    '''
    View the frecuency distribution of phylogenic data.
    '''

    # initialize the control variable
    OK = True

    # assign the text of the "name"
    if stats_code == 'species':
        name = 'Species - Frequency distribution'
    elif stats_code == 'family':
        name = 'Family - Frequency distribution'
    elif stats_code == 'phylum':
        name = 'Phylum - Frequency distribution'
    elif stats_code == 'namespace':
        name = 'GO - Frequency distribution per namespace'

    # print the header
    clib.clear_screen()
    clib.print_headers_with_environment(f'Statistics - {name} data')

    # get the pipeline dataset identification
    app_list = [
        xlib.get_toa_process_pipeline_nucleotide_code(),
        xlib.get_toa_process_pipeline_aminoacid_code(),
        xlib.get_toa_process_merge_annotations_code()
    ]
    pipeline_dataset_id = cinputs.input_result_dataset_id(
        xlib.get_toa_result_pipeline_dir(), app_list)
    if pipeline_dataset_id == '':
        print(
            'WARNING: There are not any annotation pipeline result datasets.')
        OK = False

    # build distribution dictionary
    if OK:

        # initialize the distribution dictionary
        distribution_dict = {}

        # get the dictionary of TOA configuration
        toa_config_dict = xtoa.get_toa_config_dict()

        # get the statistics file path
        stats_file = f'{toa_config_dict["RESULT_DIR"]}/{xlib.get_toa_result_pipeline_dir()}/{pipeline_dataset_id}/{toa_config_dict["STATS_SUBDIR_NAME"]}/{stats_code}-{toa_config_dict["STATS_BASE_NAME"]}.csv'

        # open the statistics file
        if stats_file.endswith('.gz'):
            try:
                stats_file_id = gzip.open(stats_file,
                                          mode='rt',
                                          encoding='iso-8859-1',
                                          newline='\n')
            except Exception as e:
                raise xlib.ProgramException('F002', stats_file)
        else:
            try:
                stats_file_id = open(stats_file,
                                     mode='r',
                                     encoding='iso-8859-1',
                                     newline='\n')
            except Exception as e:
                raise xlib.ProgramException('F001', stats_file)

        # initialize the record counter
        record_counter = 0

        # initialize the header record control
        header_record = True

        # read the first record
        record = stats_file_id.readline()

        # while there are records
        while record != '':

            # add 1 to the record counter
            record_counter += 1

            # process the header record
            if header_record:
                header_record = False

            # process data records
            else:

                # extract data
                # record format: "stats_code_id";"all_count";"first_hsp_count";"min_evalue_count"
                data_list = []
                begin = 0
                for end in [i for i, chr in enumerate(record) if chr == ';']:
                    data_list.append(record[begin:end].strip('"'))
                    begin = end + 1
                data_list.append(record[begin:].strip('\n').strip('"'))
                try:
                    id = data_list[0]
                    all_count = data_list[1]
                    first_hsp_count = data_list[2]
                    min_evalue_count = data_list[3]
                except Exception as e:
                    raise xlib.ProgramException('F006',
                                                os.path.basename(stats_file),
                                                record_counter)

                # add dato to the dictionary
                distribution_dict[id] = {
                    'id': id,
                    'all_count': all_count,
                    'first_hsp_count': first_hsp_count,
                    'min_evalue_count': min_evalue_count
                }

            # read the next record
            record = stats_file_id.readline()

    # print the distribution
    if OK:
        print(xlib.get_separator())
        if distribution_dict == {}:
            print('*** WARNING: There is not any distribution.')
        else:
            # set data width
            id_width = 50
            all_count_width = 11
            first_hsp_count_width = 11
            min_evalue_count_width = 11
            # set line template
            line_template = '{0:' + str(id_width) + '}   {1:' + str(
                all_count_width) + '}   {2:' + str(
                    first_hsp_count_width) + '}   {3:' + str(
                        min_evalue_count_width) + '}'
            # print header
            print(
                line_template.format(stats_code.capitalize(), 'All',
                                     'First HSP', 'Min e-value'))
            print(
                line_template.format('=' * id_width, '=' * all_count_width,
                                     '=' * first_hsp_count_width,
                                     '=' * min_evalue_count_width))
            # print detail lines
            for key in sorted(distribution_dict.keys()):
                print(
                    line_template.format(
                        distribution_dict[key]['id'],
                        distribution_dict[key]['all_count'],
                        distribution_dict[key]['first_hsp_count'],
                        distribution_dict[key]['min_evalue_count']))

    # show continuation message
    print(xlib.get_separator())
    input('Press [Intro] to continue ...')
Ejemplo n.º 8
0
def form_view_dataset_data_frecuency():
    '''
    View the frecuency distribution of annotation dataset data.
    '''

    # initialize the control variable
    OK = True

    # print the header
    clib.clear_screen()
    clib.print_headers_with_environment(
        'Statistics - Annotation datasets - Frequency distribution data')

    # get the pipeline dataset identification
    app_list = [
        xlib.get_toa_process_pipeline_nucleotide_code(),
        xlib.get_toa_process_pipeline_aminoacid_code()
    ]
    pipeline_dataset_id = cinputs.input_result_dataset_id(
        xlib.get_toa_result_pipeline_dir(), app_list)
    if pipeline_dataset_id == '':
        print(
            'WARNING: There are not any annotation pipeline result datasets.')
        OK = False

    # build distribution dictionary
    if OK:

        # initialize the distribution dictionary
        distribution_dict = {}

        # get the dictionary of TOA configuration
        toa_config_dict = xtoa.get_toa_config_dict()

        # get the statistics file path
        stats_file = f'{toa_config_dict["RESULT_DIR"]}/{xlib.get_toa_result_pipeline_dir()}/{pipeline_dataset_id}/{toa_config_dict["STATS_SUBDIR_NAME"]}/dataset-{toa_config_dict["STATS_BASE_NAME"]}.csv'

        # open the statistics file
        if stats_file.endswith('.gz'):
            try:
                stats_file_id = gzip.open(stats_file,
                                          mode='rt',
                                          encoding='iso-8859-1',
                                          newline='\n')
            except Exception as e:
                raise xlib.ProgramException('F002', stats_file)
        else:
            try:
                stats_file_id = open(stats_file,
                                     mode='r',
                                     encoding='iso-8859-1',
                                     newline='\n')
            except Exception as e:
                raise xlib.ProgramException('F001', stats_file)

        # initialize the record counter
        record_counter = 0

        # initialize the header record control
        header_record = True

        # read the first record
        record = stats_file_id.readline()

        # while there are records
        while record != '':

            # add 1 to the record counter
            record_counter += 1

            # process the header record
            if header_record:
                header_record = False

            # process data records
            else:

                # extract data
                # record format: "dataset_name";"annotated_seq_count";"remained_seq_count"
                data_list = []
                begin = 0
                for end in [i for i, chr in enumerate(record) if chr == ';']:
                    data_list.append(record[begin:end].strip('"'))
                    begin = end + 1
                data_list.append(record[begin:].strip('\n').strip('"'))
                try:
                    dataset_name = data_list[0]
                    annotated_seq_count = data_list[1]
                    remained_seq_count = data_list[2]
                except Exception as e:
                    raise xlib.ProgramException('F006',
                                                os.path.basename(stats_file),
                                                record_counter)

                # add dato to the dictionary
                distribution_dict[record_counter] = {
                    'dataset_name': dataset_name,
                    'annotated_seq_count': annotated_seq_count,
                    'remained_seq_count': remained_seq_count
                }

            # read the next record
            record = stats_file_id.readline()

    # print the distribution
    if OK:
        print(xlib.get_separator())
        if distribution_dict == {}:
            print('*** WARNING: There is not any distribution.')
        else:
            # set data width
            dataset_name_width = 19
            annotated_seq_count_width = 14
            remained_seq_count_width = 14
            # set line template
            line_template = '{0:' + str(dataset_name_width) + '}   {1:' + str(
                annotated_seq_count_width) + '}   {2:' + str(
                    remained_seq_count_width) + '}'
            # print header
            print(
                line_template.format('Dataset', 'Annotated seqs',
                                     'Remained seqs'))
            print(
                line_template.format('=' * dataset_name_width,
                                     '=' * annotated_seq_count_width,
                                     '=' * remained_seq_count_width))
            # print detail lines
            for key in sorted(distribution_dict.keys()):
                print(
                    line_template.format(
                        distribution_dict[key]['dataset_name'],
                        distribution_dict[key]['annotated_seq_count'],
                        distribution_dict[key]['remained_seq_count']))

    # show continuation message
    print(xlib.get_separator())
    input('Press [Intro] to continue ...')
Ejemplo n.º 9
0
def form_view_x_per_y_data(stats_code):
    '''
    View the x per y data.
    '''

    # initialize the control variable
    OK = True

    # assign the text of the "name"
    if stats_code == 'hit_per_hsp':
        name = '# HITs per # HSPs'
    elif stats_code == 'seq_per_go':
        name = '# sequences per # GO terms'
    elif stats_code == 'seq_per_ec':
        name = '# sequences per # EC ids'
    elif stats_code == 'seq_per_interpro':
        name = '# sequences per # InterPro ids'
    elif stats_code == 'seq_per_kegg':
        name = '# sequences per # KEGG ids'
    elif stats_code == 'seq_per_mapman':
        name = '# sequences per # MapMan ids'
    elif stats_code == 'seq_per_metacyc':
        name = '# sequences per # MetaCyc ids'

    # print the header
    clib.clear_screen()
    clib.print_headers_with_environment(f'Statistics - {name} data')

    # get the pipeline dataset identification
    if stats_code == 'hit_per_hsp':
        app_list = [
            xlib.get_toa_process_pipeline_nucleotide_code(),
            xlib.get_toa_process_pipeline_aminoacid_code()
        ]
    else:
        app_list = [
            xlib.get_toa_process_pipeline_nucleotide_code(),
            xlib.get_toa_process_pipeline_aminoacid_code(),
            xlib.get_toa_process_merge_annotations_code()
        ]
    pipeline_dataset_id = cinputs.input_result_dataset_id(
        xlib.get_toa_result_pipeline_dir(), app_list)
    if pipeline_dataset_id == '':
        print(
            'WARNING: There are not any annotation pipeline result datasets.')
        OK = False

    # build distribution dictionary
    if OK:

        # initialize the distribution dictionary
        distribution_dict = {}

        # get the dictionary of TOA configuration
        toa_config_dict = xtoa.get_toa_config_dict()

        # get the statistics file path
        stats_file = f'{toa_config_dict["RESULT_DIR"]}/{xlib.get_toa_result_pipeline_dir()}/{pipeline_dataset_id}/{toa_config_dict["STATS_SUBDIR_NAME"]}/{stats_code}-{toa_config_dict["STATS_BASE_NAME"]}.csv'

        # open the statistics file
        if stats_file.endswith('.gz'):
            try:
                stats_file_id = gzip.open(stats_file,
                                          mode='rt',
                                          encoding='iso-8859-1',
                                          newline='\n')
            except Exception as e:
                raise xlib.ProgramException('F002', stats_file)
        else:
            try:
                stats_file_id = open(stats_file,
                                     mode='r',
                                     encoding='iso-8859-1',
                                     newline='\n')
            except Exception as e:
                raise xlib.ProgramException('F001', stats_file)

        # initialize the record counter
        record_counter = 0

        # initialize the header record control
        header_record = True

        # read the first record
        record = stats_file_id.readline()

        # while there are records
        while record != '':

            # add 1 to the record counter
            record_counter += 1

            # process the header record
            if header_record:
                header_record = False

            # process data records
            else:

                # extract data
                # record format: "x_count";"y_count"
                data_list = []
                begin = 0
                for end in [i for i, chr in enumerate(record) if chr == ';']:
                    data_list.append(record[begin:end].strip('"'))
                    begin = end + 1
                data_list.append(record[begin:].strip('\n').strip('"'))
                try:
                    x_count = data_list[0]
                    y_count = data_list[1]
                except Exception as e:
                    raise xlib.ProgramException('F006',
                                                os.path.basename(stats_file),
                                                record_counter)

                # add dato to the dictionary
                distribution_dict[record_counter] = {
                    'x_count': x_count,
                    'y_count': y_count
                }

            # read the next record
            record = stats_file_id.readline()

    # print the distribution
    if OK:
        print(xlib.get_separator())
        if distribution_dict == {}:
            print('*** WARNING: There is not any stats data.')
        else:
            # set data width
            x_count_width = 15
            y_count_width = 15
            # set line template
            line_template = '{0:' + str(x_count_width) + '}   {1:' + str(
                y_count_width) + '}'
            # print header
            if stats_code == 'hit_per_hsp':
                print(line_template.format('# HSPs', '# HITs'))
            elif stats_code == 'seq_per_go':
                print(line_template.format('# GO terms', '# sequences'))
            elif stats_code == 'seq_per_ec':
                print(line_template.format('# EC ids', '# sequences'))
            elif stats_code == 'seq_per_interpro':
                print(line_template.format('# InterPro ids', '# sequences'))
            elif stats_code == 'seq_per_kegg':
                print(line_template.format('# KEGG ids', '# sequences'))
            elif stats_code == 'seq_per_mapman':
                print(line_template.format('# MapMan ids', '# sequences'))
            elif stats_code == 'seq_per_metacyc':
                print(line_template.format('# MetaCyc ids', '# sequences'))
            print(
                line_template.format('=' * x_count_width, '=' * y_count_width))
            # print detail lines
            for key in sorted(distribution_dict.keys()):
                print(
                    line_template.format(distribution_dict[key]['x_count'],
                                         distribution_dict[key]['y_count']))

    # show continuation message
    print(xlib.get_separator())
    input('Press [Intro] to continue ...')
Ejemplo n.º 10
0
def form_recreate_annotation_merger_config_file():
    '''
    Recreate the annotation merger config file.
    '''

    # initialize the control variable
    OK = True

    # print the header
    clib.clear_screen()
    clib.print_headers_with_environment(
        f'{xlib.get_toa_process_merge_annotations_name()} - Recreate config file'
    )

    # get the identification of the first pipeline dataset
    app_list = [
        xlib.get_toa_process_pipeline_nucleotide_code(),
        xlib.get_toa_process_pipeline_aminoacid_code(),
        xlib.get_toa_process_merge_annotations_code()
    ]
    print('First pipeline ...')
    pipeline_dataset_id_1 = cinputs.input_result_dataset_id(
        xlib.get_toa_result_pipeline_dir(), app_list)
    if pipeline_dataset_id_1 == '':
        print('WARNING: There are not any pipeline datasets.')
        OK = False

    # get the identification of the second pipeline dataset
    app_list = [
        xlib.get_toa_process_pipeline_nucleotide_code(),
        xlib.get_toa_process_pipeline_aminoacid_code(),
        xlib.get_toa_process_merge_annotations_code()
    ]
    print('Second pipeline ...')
    pipeline_dataset_id_2 = cinputs.input_result_dataset_id(
        xlib.get_toa_result_pipeline_dir(), app_list)
    if pipeline_dataset_id_2 == '':
        print('WARNING: There are not any pipeline datasets.')
        OK = False
    elif pipeline_dataset_id_1 == pipeline_dataset_id_2:
        print('ERROR: The first pipeline dataset is equal to the second one.')
        OK = False

    # get the merger operation
    if OK:
        merger_operation = cinputs.input_code(
            text='Merger operation',
            code_list=xlib.get_annotation_merger_operation_code_list(),
            default_code=None).upper()

    # recreate the pipeline config file
    if OK:

        # confirm the creation of the config file
        print(xlib.get_separator())
        OK = clib.confirm_action(
            f'The file {xtoa.get_annotation_merger_config_file()} is going to be recreated. The previous files will be lost.'
        )

        # recreate the config file
        if OK:
            (OK, error_list) = xtoa.create_annotation_merger_config_file(
                pipeline_dataset_id_1, pipeline_dataset_id_2, merger_operation)
            if OK:
                print('The file is recreated.')
            else:
                for error in error_list:
                    print(error)

    # show continuation message
    print(xlib.get_separator())
    input('Press [Intro] to continue ...')
Ejemplo n.º 11
0
def form_recreate_pipeline_config_file(pipeline_type):
    '''
    Recreate a pipeline config file.
    '''

    # initialize the control variable
    OK = True

    # set the pipeline name
    if pipeline_type == xlib.get_toa_process_pipeline_nucleotide_code():
        name = xlib.get_toa_process_pipeline_nucleotide_name()
    elif pipeline_type == xlib.get_toa_process_pipeline_aminoacid_code():
        name = xlib.get_toa_process_pipeline_aminoacid_name()

    # set the config file
    if pipeline_type == xlib.get_toa_process_pipeline_nucleotide_code():
        config_file = xtoa.get_nucleotide_pipeline_config_file()
    elif pipeline_type == xlib.get_toa_process_pipeline_aminoacid_code():
        config_file = xtoa.get_aminoacid_pipeline_config_file()

    # print the header
    clib.clear_screen()
    clib.print_headers_with_environment(f'{name} - Recreate config file')
    print(xlib.get_separator())

    # get the transcriptome directory
    transcriptome_dir = ''
    while transcriptome_dir == '':
        transcriptome_dir = input('Enter transcriptome directory: ')
        if not os.path.isdir(transcriptome_dir):
            print(f'***ERROR: The directory {transcriptome_dir} is not valid.')
            transcriptome_dir = ''

    # get the transcriptome file
    transcriptome_file = ''
    while transcriptome_file == '':
        transcriptome_file = input('Enter transcriptome file: ')
        print(f'transcriptome_file: {transcriptome_file}')
        if not os.path.isfile(f'{transcriptome_dir}/{transcriptome_file}'):
            print(f'***ERROR: The file {transcriptome_file} is not valid.')
            transcriptome_file = ''

    # get the database list
    if OK:

        # nucleotide pipelines
        if pipeline_type == xlib.get_toa_process_pipeline_nucleotide_code():
            database_list = cinputs.input_database_list(
                xtoa.get_nucleotide_annotation_database_code_list(), 'nt')

        # amino acid pipelines
        elif pipeline_type == xlib.get_toa_process_pipeline_aminoacid_code():
            database_list = cinputs.input_database_list(
                xtoa.get_aminoacid_annotation_database_code_list(), 'nr')

    # recreate the pipeline config file
    if OK:

        # confirm the creation of the config file
        print(xlib.get_separator())
        OK = clib.confirm_action(
            f'The file {config_file} is going to be recreated. The previous files will be lost.'
        )

        # recreate the config file
        if OK:
            (OK, error_list) = xtoa.create_pipeline_config_file(
                pipeline_type, transcriptome_dir, transcriptome_file,
                database_list)
            if OK:
                print('The file is recreated.')
            else:
                for error in error_list:
                    print(error)

    # show continuation message
    print(xlib.get_separator())
    input('Press [Intro] to continue ...')
Ejemplo n.º 12
0
def form_list_cluster_experiment_processes():
    '''
    List the processes of an experiment in the cluster.
    '''

    # initialize the control variable
    OK = True

    # print the header
    clib.clear_screen()
    clib.print_headers_with_environment(
        'Logs - List experiment processes in the cluster')

    # get the cluster name
    print(xlib.get_separator())
    if xec2.get_running_cluster_list(only_environment_cluster=True,
                                     volume_creator_included=False) != []:
        cluster_name = cinputs.input_cluster_name(
            volume_creator_included=False, help=True)
    else:
        print('WARNING: There is not any running cluster.')
        OK = False

    # create the SSH client connection
    if OK:
        (OK, error_list,
         ssh_client) = xssh.create_ssh_client_connection(cluster_name)
        for error in error_list:
            print(error)

    # get experiment identification
    if OK:
        experiment_id = cinputs.input_experiment_id(ssh_client, help=True)
        if experiment_id == '':
            print(
                f'WARNING: The cluster {cluster_name} does not have experiment data.'
            )
            OK = False

    # get the result dataset list of the experiment
    if OK:
        command = f'cd  {xlib.get_cluster_result_dir()}/{experiment_id}; for list in `ls`; do ls -ld $list | grep -v ^- > /dev/null && echo $list; done;'
        (OK, stdout, _) = xssh.execute_cluster_command(ssh_client, command)
        if OK:
            result_dataset_id_list = []
            for line in stdout:
                line = line.rstrip('\n')
                if line != 'lost+found':
                    result_dataset_id_list.append(line)

    # print the result dataset identification list of the experiment
    if OK:
        print(xlib.get_separator())
        if result_dataset_id_list == []:
            print(
                f'*** WARNING: There is not any result dataset of the experiment {experiment_id}.'
            )
        else:
            result_dataset_id_list.sort()
            # set data width
            result_dataset_width = 30
            bioinfo_app_width = 25
            # set line
            line = '{0:' + str(result_dataset_width) + '}   {1:' + str(
                bioinfo_app_width) + '}'
            # print header
            print(line.format('Result dataset', 'Bioinfo app / Utility'))
            print(
                line.format('=' * result_dataset_width,
                            '=' * bioinfo_app_width))
            # print detail lines
            for result_dataset_id in result_dataset_id_list:

                if result_dataset_id.startswith(xlib.get_bedtools_code() +
                                                '-'):
                    bioinfo_app_name = xlib.get_bedtools_name()

                elif result_dataset_id.startswith(xlib.get_blastplus_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_blastplus_name()

                elif result_dataset_id.startswith(xlib.get_bcftools_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_bcftools_name()

                elif result_dataset_id.startswith(xlib.get_bowtie2_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_bowtie2_name()

                elif result_dataset_id.startswith(xlib.get_busco_code() + '-'):
                    bioinfo_app_name = xlib.get_busco_name()

                elif result_dataset_id.startswith(xlib.get_cd_hit_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_cd_hit_name()

                elif result_dataset_id.startswith(xlib.get_cd_hit_est_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_cd_hit_est_name()

                elif result_dataset_id.startswith(xlib.get_cuffdiff_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_cuffdiff_name()

                elif result_dataset_id.startswith(xlib.get_cufflinks_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_cufflinks_name()

                elif result_dataset_id.startswith(
                        xlib.get_cufflinks_cuffmerge_code() + '-'):
                    bioinfo_app_name = xlib.get_cufflinks_cuffmerge_name()

                elif result_dataset_id.startswith(xlib.get_cuffnorm_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_cuffnorm_name()

                elif result_dataset_id.startswith(xlib.get_cuffquant_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_cuffquant_name()

                elif result_dataset_id.startswith(xlib.get_cutadapt_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_cutadapt_name()

                elif result_dataset_id.startswith(
                        xlib.get_ddradseq_simulation_code() + '-'):
                    bioinfo_app_name = xlib.get_ddradseq_simulation_name()

                elif result_dataset_id.startswith(
                        xlib.get_ddradseqtools_code() + '-'):
                    bioinfo_app_name = xlib.get_ddradseqtools_name()

                elif result_dataset_id.startswith(xlib.get_detonate_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_detonate_name()

                elif result_dataset_id.startswith(xlib.get_diamond_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_diamond_name()

                elif result_dataset_id.startswith(xlib.get_emboss_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_emboss_name()

                elif result_dataset_id.startswith(
                        xlib.get_entrez_direct_code() + '-'):
                    bioinfo_app_name = xlib.get_entrez_direct_name()

                elif result_dataset_id.startswith(xlib.get_express_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_express_name()

                elif result_dataset_id.startswith(xlib.get_fastqc_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_fastqc_name()

                elif result_dataset_id.startswith(xlib.get_ggtrinity_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_ggtrinity_name()

                elif result_dataset_id.startswith(xlib.get_gmap_gsnap_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_gmap_gsnap_name()

                elif result_dataset_id.startswith(xlib.get_gmap_code() + '-'):
                    bioinfo_app_name = xlib.get_gmap_name()

                elif result_dataset_id.startswith(xlib.get_gsnap_code() + '-'):
                    bioinfo_app_name = xlib.get_gsnap_name()

                elif result_dataset_id.startswith(xlib.get_gzip_code() + '-'):
                    bioinfo_app_name = xlib.get_gzip_name()

                elif result_dataset_id.startswith(xlib.get_hisat2_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_hisat2_name()

                elif result_dataset_id.startswith(xlib.get_htseq_code() + '-'):
                    bioinfo_app_name = xlib.get_htseq_name()

                elif result_dataset_id.startswith(xlib.get_htseq_count_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_htseq_count_name()

                elif result_dataset_id.startswith(
                        xlib.get_insilico_read_normalization_code() + '-'):
                    bioinfo_app_name = xlib.get_insilico_read_normalization_name(
                    )

                elif result_dataset_id.startswith(xlib.get_ipyrad_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_ipyrad_name()

                elif result_dataset_id.startswith(xlib.get_kallisto_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_kallisto_name()

                elif result_dataset_id.startswith(xlib.get_miniconda3_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_miniconda3_name()

                elif result_dataset_id.startswith(xlib.get_ngshelper_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_ngshelper_name()

                elif result_dataset_id.startswith(xlib.get_quast_code() + '-'):
                    bioinfo_app_name = xlib.get_quast_name()

                elif result_dataset_id.startswith(xlib.get_r_code() + '-'):
                    bioinfo_app_name = xlib.get_r_name()

                elif result_dataset_id.startswith(xlib.get_raddesigner_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_raddesigner_name()

                elif result_dataset_id.startswith(xlib.get_ref_eval_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_ref_eval_name()

                elif result_dataset_id.startswith(xlib.get_rnaquast_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_rnaquast_name()

                elif result_dataset_id.startswith(xlib.get_rsem_code() + '-'):
                    bioinfo_app_name = xlib.get_rsem_name()

                elif result_dataset_id.startswith(xlib.get_rsem_eval_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_rsem_eval_name()

                elif result_dataset_id.startswith(xlib.get_rsitesearch_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_rsitesearch_name()

                elif result_dataset_id.startswith(xlib.get_samtools_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_samtools_name()

                elif result_dataset_id.startswith(xlib.get_soapdenovo2_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_soapdenovo2_name()

                elif result_dataset_id.startswith(
                        xlib.get_soapdenovotrans_code() + '-'):
                    bioinfo_app_name = xlib.get_soapdenovotrans_name()

                elif result_dataset_id.startswith(xlib.get_star_code() + '-'):
                    bioinfo_app_name = xlib.get_star_name()

                elif result_dataset_id.startswith(xlib.get_starcode_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_starcode_name()

                elif result_dataset_id.startswith(xlib.get_toa_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_basic_data_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_basic_data_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_dicots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_dicots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_gene_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_gene_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_go_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_go_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_gymno_01_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_gymno_01_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_interpro_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_interpro_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_monocots_04_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_monocots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_taxonomy_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_taxonomy_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.
                        get_toa_process_gilist_viridiplantae_nucleotide_gi_code(
                        ) + '-'):
                    bioinfo_app_name = xlib.get_toa_process_gilist_viridiplantae_nucleotide_gi_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.
                        get_toa_process_gilist_viridiplantae_protein_gi_code()
                        + '-'):
                    bioinfo_app_name = xlib.get_toa_process_gilist_viridiplantae_protein_gi_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_basic_data_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_basic_data_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_dicots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_dicots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_gene_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_gene_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_go_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_go_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_gymno_01_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_gymno_01_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_interpro_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_interpro_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_monocots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_monocots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_merge_annotations_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_merge_annotations_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_nr_blastplus_db_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_nr_blastplus_db_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_nr_diamond_db_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_nr_diamond_db_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_nt_blastplus_db_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_nt_blastplus_db_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_pipeline_aminoacid_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_pipeline_aminoacid_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_pipeline_nucleotide_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_pipeline_nucleotide_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_dicots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_dicots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_gymno_01_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_gymno_01_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_monocots_04_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_monocots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_refseq_plant_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_refseq_plant_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_rebuild_toa_database_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_get_toa_process_rebuild_toa_database_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_recreate_toa_database_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_get_toa_process_recreate_toa_database_name(
                    )

                elif result_dataset_id.startswith(xlib.get_tophat_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_tophat_name()

                elif result_dataset_id.startswith(xlib.get_transabyss_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_transabyss_name()

                elif result_dataset_id.startswith(
                        xlib.get_transcript_filter_code() + '-'):
                    bioinfo_app_name = xlib.get_transcript_filter_name()

                elif result_dataset_id.startswith(
                        xlib.get_transcriptome_blastx_code() + '-'):
                    bioinfo_app_name = xlib.get_transcriptome_blastx_name()

                elif result_dataset_id.startswith(
                        xlib.get_transdecoder_code() + '-'):
                    bioinfo_app_name = xlib.get_transdecoder_name()

                elif result_dataset_id.startswith(xlib.get_transrate_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_transrate_name()

                elif result_dataset_id.startswith(xlib.get_trimmomatic_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_trimmomatic_name()

                elif result_dataset_id.startswith(xlib.get_trinity_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_trinity_name()

                elif result_dataset_id.startswith(
                        xlib.get_variant_calling_code() + '-'):
                    bioinfo_app_name = xlib.get_variant_calling_name()

                elif result_dataset_id.startswith(xlib.get_vcftools_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_vcftools_name()

                elif result_dataset_id.startswith(
                        xlib.get_vcftools_perl_libraries_code() + '-'):
                    bioinfo_app_name = xlib.get_vcftools_perl_libraries_name()

                elif result_dataset_id.startswith(xlib.get_vsearch_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_vsearch_name()

                else:
                    bioinfo_app_name = 'xxx'

                print(line.format(result_dataset_id, bioinfo_app_name))

    # close the SSH client connection
    if OK:
        xssh.close_ssh_client_connection(ssh_client)

    # show continuation message
    print(xlib.get_separator())
    input('Press [Intro] to continue ...')
Ejemplo n.º 13
0
def form_list_results_logs():
    '''
    List the processes of an experiment in the cluster.
    '''

    # initialize the control variable
    OK = True

    # print the header
    clib.clear_screen()
    clib.print_headers_with_environment('Logs - List result logs')

    # get experiment identification
    experiment_id = cinputs.input_experiment_id()
    if experiment_id == '':
        print('WARNING: There is not any experiment/process run.')
        OK = False

    # get the dictionary of TOA configuration.
    if OK:
        toa_config_dict = xtoa.get_toa_config_dict()

    # get the result dataset list of the experiment
    if OK:
        experiment_dir = f'{toa_config_dict["RESULT_DIR"]}/{experiment_id}'
        subdir_list = [
            subdir for subdir in os.listdir(experiment_dir)
            if os.path.isdir(os.path.join(experiment_dir, subdir))
        ]
        result_dataset_id_list = []
        for subdir in subdir_list:
            result_dataset_id_list.append(subdir)

    # print the result dataset identification list of the experiment
    if OK:
        print(xlib.get_separator())
        if result_dataset_id_list == []:
            print(
                f'*** WARNING: There is not any result dataset of the experiment/process {experiment_id}.'
            )
        else:
            result_dataset_id_list.sort()
            # set data width
            result_dataset_width = 25
            bioinfo_app_width = 25
            # set line template
            line_template = '{0:' + str(
                result_dataset_width) + '}   {1:' + str(
                    bioinfo_app_width) + '}'
            # print header
            print(
                line_template.format('Result dataset',
                                     'Bioinfo app / Utility'))
            print(
                line_template.format('=' * result_dataset_width,
                                     '=' * bioinfo_app_width))
            # print detail lines
            for result_dataset_id in result_dataset_id_list:

                if result_dataset_id.startswith(xlib.get_blastplus_code() +
                                                '-'):
                    bioinfo_app_name = xlib.get_blastplus_name()

                elif result_dataset_id.startswith(xlib.get_diamond_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_diamond_name()

                elif result_dataset_id.startswith(
                        xlib.get_entrez_direct_code() + '-'):
                    bioinfo_app_name = xlib.get_entrez_direct_name()

                elif result_dataset_id.startswith(xlib.get_miniconda3_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_miniconda3_name()

                elif result_dataset_id.startswith(xlib.get_r_code() + '-'):
                    bioinfo_app_name = xlib.get_r_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_basic_data_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_basic_data_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_dicots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_dicots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_gene_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_gene_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_go_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_go_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_gymno_01_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_gymno_01_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_interpro_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_interpro_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_monocots_04_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_monocots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_taxonomy_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_taxonomy_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.
                        get_toa_process_gilist_viridiplantae_nucleotide_gi_code(
                        ) + '-'):
                    bioinfo_app_name = xlib.get_toa_process_gilist_viridiplantae_nucleotide_gi_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.
                        get_toa_process_gilist_viridiplantae_protein_gi_code()
                        + '-'):
                    bioinfo_app_name = xlib.get_toa_process_gilist_viridiplantae_protein_gi_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_basic_data_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_basic_data_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_dicots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_dicots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_gene_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_gene_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_go_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_go_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_gymno_01_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_gymno_01_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_interpro_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_interpro_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_monocots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_monocots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_merge_annotations_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_merge_annotations_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_nr_blastplus_db_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_nr_blastplus_db_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_nr_diamond_db_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_nr_diamond_db_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_nt_blastplus_db_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_nt_blastplus_db_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_pipeline_aminoacid_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_pipeline_aminoacid_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_pipeline_nucleotide_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_pipeline_nucleotide_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_dicots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_dicots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_gymno_01_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_gymno_01_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_monocots_04_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_monocots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_refseq_plant_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_refseq_plant_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_rebuild_toa_database_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_get_toa_process_rebuild_toa_database_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_recreate_toa_database_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_get_toa_process_recreate_toa_database_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_transdecoder_code() + '-'):
                    bioinfo_app_name = xlib.get_transdecoder_name()

                else:
                    bioinfo_app_name = 'xxx'

                print(line_template.format(result_dataset_id,
                                           bioinfo_app_name))

    # show continuation message
    print(xlib.get_separator())
    input('Press [Intro] to continue ...')