Beispiel #1
0
    def __init__(self, main, app):
        '''
        Execute actions correspending to the creation of a "FormInstallBioinfoApp" instance.
        '''

        # save initial parameters in instance variables
        self.main = main
        self.root = main.root
        self.container = main.container
        self.app_code = app

        # call the init method of the parent class
        tkinter.Frame.__init__(self, self.container)

        # set cursor to show busy status
        self.root.config(cursor='watch')
        self.root.update()

        # set the software name
        if self.app_code == xlib.get_blastplus_code():
            self.app_name = xlib.get_blastplus_name()

        elif self.app_code == xlib.get_diamond_code():
            self.app_name = xlib.get_diamond_name()

        elif self.app_code == xlib.get_entrez_direct_code():
            self.app_name = xlib.get_entrez_direct_name()

        elif self.app_code == xlib.get_miniconda3_code():
            self.app_name = xlib.get_miniconda3_name()

        elif self.app_code == xlib.get_r_code():
            self.app_name = xlib.get_r_name()

        elif self.app_code == xlib.get_transdecoder_code():
            self.app_name = xlib.get_transdecoder_name()

        # assign the text of the "head"
        self.head = f'{self.app_name} - Install software'

        # create the wrappers to track changes in the inputs
        pass

        # build the graphical user interface
        self.build_gui()

        # load initial data in inputs
        self.initialize_inputs()

        # set cursor to show normal status
        self.root.config(cursor='')
        self.root.update()
Beispiel #2
0
Datei: glog.py Projekt: GGFHF/TOA
    def execute(self, event=None):
        '''
        Execute the list the result logs in the cluster.
        '''

        # if "button_execute" is disabled, exit function
        if str(self.button_execute['state']) == 'disabled':
            return

        # check inputs
        OK = self.check_inputs()
        if not OK:
            message = 'Some input values are not OK.'
            tkinter.messagebox.showerror(
                f'{xlib.get_short_project_name()} - {self.head}', message)

        # get the dictionary of TOA configuration.
        if OK:
            toa_config_dict = xtoa.get_toa_config_dict()

        # get the run dictionary
        if OK:
            process_type_dir = f'{toa_config_dict["RESULT_DIR"]}/{self.wrapper_process_type.get()}'
            subdir_list = [
                subdir for subdir in os.listdir(process_type_dir)
                if os.path.isdir(os.path.join(process_type_dir, subdir))
            ]
            result_dataset_dict = {}
            for subdir in subdir_list:
                result_dataset_id = subdir
                try:
                    pattern = r'^(.+)\-(.+)\-(.+)$'
                    mo = re.search(pattern, result_dataset_id)
                    bioinfo_app_code = mo.group(1).strip()
                    yymmdd = mo.group(2)
                    hhmmss = mo.group(3)
                    date = f'20{yymmdd[:2]}-{yymmdd[2:4]}-{yymmdd[4:]}'
                    time = f'{hhmmss[:2]}:{hhmmss[2:4]}:{hhmmss[4:]}'
                except:
                    bioinfo_app_code = 'xxx'
                    date = '0000-00-00'
                    time = '00:00:00'

                if result_dataset_id.startswith(xlib.get_blastplus_code() +
                                                '-'):
                    bioinfo_app_name = xlib.get_blastplus_name()

                elif result_dataset_id.startswith(xlib.get_diamond_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_diamond_name()

                elif result_dataset_id.startswith(
                        xlib.get_entrez_direct_code() + '-'):
                    bioinfo_app_name = xlib.get_entrez_direct_name()

                elif result_dataset_id.startswith(xlib.get_miniconda3_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_miniconda3_name()

                elif result_dataset_id.startswith(xlib.get_r_code() + '-'):
                    bioinfo_app_name = xlib.get_r_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_basic_data_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_basic_data_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_dicots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_dicots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_gene_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_gene_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_go_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_go_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_gymno_01_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_gymno_01_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_interpro_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_interpro_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_monocots_04_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_monocots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_taxonomy_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_taxonomy_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.
                        get_toa_process_gilist_viridiplantae_nucleotide_gi_code(
                        ) + '-'):
                    bioinfo_app_name = xlib.get_toa_process_gilist_viridiplantae_nucleotide_gi_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.
                        get_toa_process_gilist_viridiplantae_protein_gi_code()
                        + '-'):
                    bioinfo_app_name = xlib.get_toa_process_gilist_viridiplantae_protein_gi_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_basic_data_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_basic_data_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_dicots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_dicots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_gene_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_gene_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_go_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_go_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_gymno_01_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_gymno_01_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_interpro_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_interpro_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_monocots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_monocots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_merge_annotations_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_merge_annotations_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_nr_blastplus_db_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_nr_blastplus_db_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_nr_diamond_db_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_nr_diamond_db_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_nt_blastplus_db_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_nt_blastplus_db_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_pipeline_aminoacid_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_pipeline_aminoacid_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_pipeline_nucleotide_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_pipeline_nucleotide_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_dicots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_dicots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_gymno_01_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_gymno_01_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_monocots_04_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_monocots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_refseq_plant_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_refseq_plant_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_rebuild_toa_database_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_get_toa_process_rebuild_toa_database_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_recreate_toa_database_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_get_toa_process_recreate_toa_database_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_transdecoder_code() + '-'):
                    bioinfo_app_name = xlib.get_transdecoder_name()

                else:
                    bioinfo_app_name = 'xxx'

                status_ok = os.path.isfile(
                    xlib.get_status_ok(os.path.join(process_type_dir, subdir)))
                status_wrong = os.path.isfile(
                    xlib.get_status_wrong(
                        os.path.join(process_type_dir, subdir)))
                if status_ok and not status_wrong:
                    status = 'OK'
                elif not status_ok and status_wrong:
                    status = 'wrong'
                elif not status_ok and not status_wrong:
                    status = 'not finished'
                elif status_ok and status_wrong:
                    status = 'undetermined'
                key = f'{bioinfo_app_name}-{result_dataset_id}'
                result_dataset_dict[key] = {
                    'process_type': self.wrapper_process_type.get(),
                    'bioinfo_app': bioinfo_app_name,
                    'result_dataset_id': result_dataset_id,
                    'date': date,
                    'time': time,
                    'status': status
                }

        # check if there are any nodes running
        if OK:
            if result_dataset_dict == {}:
                message = 'There is not any run.'
                tkinter.messagebox.showwarning(
                    f'{xlib.get_short_project_name()} - {self.head}', message)

        # build the data list
        if OK:
            data_list = [
                'process_type', 'bioinfo_app', 'result_dataset_id', 'date',
                'time', 'status'
            ]

        # build the data dictionary
        if OK:
            data_dict = {}
            data_dict['process_type'] = {
                'text': 'Process type',
                'width': 180,
                'alignment': 'left'
            }
            data_dict['bioinfo_app'] = {
                'text': 'Bioinfo app / Utility',
                'width': 340,
                'alignment': 'left'
            }
            data_dict['result_dataset_id'] = {
                'text': 'Result dataset',
                'width': 225,
                'alignment': 'left'
            }
            data_dict['date'] = {
                'text': 'Date',
                'width': 95,
                'alignment': 'right'
            }
            data_dict['time'] = {
                'text': 'Time',
                'width': 75,
                'alignment': 'right'
            }
            data_dict['status'] = {
                'text': 'Status',
                'width': 90,
                'alignment': 'left'
            }

        # create the dialog Table to show the nodes running
        if OK:
            dialog_table = gdialogs.DialogTable(
                self,
                f'Runs in {xlib.get_result_dir()}/{self.wrapper_process_type.get()}',
                400, 1030, data_list, data_dict, result_dataset_dict,
                sorted(result_dataset_dict.keys()), 'view_result_logs',
                ['revisar'])
            self.wait_window(dialog_table)

        # close the form
        if OK:
            self.close()
Beispiel #3
0
def build_gmap_process_script(cluster_name, current_run_dir):
    '''
    Build the current GMAP process script.
    '''

    # initialize the control variable and the error list
    OK = True
    error_list = []

    # get the GMAP option dictionary
    gmap_option_dict = xlib.get_option_dict(get_gmap_config_file())

    # get the options
    experiment_id = gmap_option_dict['identification']['experiment_id']
    reference_dataset_id = gmap_option_dict['identification']['reference_dataset_id']
    reference_file = gmap_option_dict['identification']['reference_file']
    assembly_software = gmap_option_dict['identification']['assembly_software']
    assembly_dataset_id = gmap_option_dict['identification']['assembly_dataset_id']
    assembly_type = gmap_option_dict['identification']['assembly_type']
    threads = gmap_option_dict['GMAP parameters']['threads']
    kmer = gmap_option_dict['GMAP parameters']['kmer']
    sampling = gmap_option_dict['GMAP parameters']['sampling']
    input_buffer_size = gmap_option_dict['GMAP parameters']['input-buffer-size']
    output_buffer_size = gmap_option_dict['GMAP parameters']['output-buffer-size']
    prunelevel = gmap_option_dict['GMAP parameters']['prunelevel']
    format = gmap_option_dict['GMAP parameters']['format']
    other_parameters = gmap_option_dict['GMAP parameters']['other_parameters']

    # set the cluster reference dataset directory
    cluster_reference_dataset_dir = xlib.get_cluster_reference_dataset_dir(reference_dataset_id)

    # set the cluster reference file
    cluster_reference_file = xlib.get_cluster_reference_file(reference_dataset_id, reference_file)

    # set the GMAP database name
    reference_file_name, reference_file_extension = os.path.splitext(reference_file)
    gmap_database = '{0}-gmap_database'.format(reference_file_name)

    # set the transcriptome file path
    if assembly_software == xlib.get_soapdenovotrans_code():
        if assembly_type.upper() == 'CONTIGS':
            transcriptome_file = '{0}/{1}-{2}.contig'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id), experiment_id, assembly_dataset_id)
        elif assembly_type.upper() == 'SCAFFOLDS':
            transcriptome_file = '{0}/{1}-{2}.scafSeq'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id), experiment_id, assembly_dataset_id)
    elif assembly_software == xlib.get_transabyss_code():
        transcriptome_file = '{0}/transabyss-final.fa'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id))
    elif assembly_software == xlib.get_trinity_code():
        transcriptome_file = '{0}/Trinity.fasta'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id))
    elif assembly_software == xlib.get_star_code():
        transcriptome_file = '{0}/Trinity-GG.fasta'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id))
    elif assembly_software == xlib.get_cd_hit_est_code():
        transcriptome_file = '{0}/clustered-transcriptome.fasta'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id))
    elif assembly_software == xlib.get_transcript_filter_code():
        transcriptome_file = '{0}/filtered-transcriptome.fasta'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id))

    # set the output file path
    output_file = 'gmap_output_{0}.txt'.format(format.lower())

    # get the GMAP process script name
    gmap_process_script = get_gmap_process_script()

    # write the GMAP process script
    try:
        if not os.path.exists(os.path.dirname(gmap_process_script)):
            os.makedirs(os.path.dirname(gmap_process_script))
        with open(gmap_process_script, mode='w', encoding='utf8', newline='\n') as file_id:
            file_id.write('{0}\n'.format('#!/bin/bash'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('GMAP_GSNAP_PATH={0}/{1}/envs/{2}/bin'.format(xlib.get_cluster_app_dir(), xlib.get_miniconda3_name(), xlib.get_gmap_gsnap_bioconda_code())))
            file_id.write('{0}\n'.format('PATH=$GMAP_GSNAP_PATH:$PATH'))
            file_id.write('{0}\n'.format('SEP="#########################################"'))
            file_id.write('{0}\n'.format('cd {0}/{1}/bin'.format(xlib.get_cluster_app_dir(), xlib.get_miniconda3_name())))
            file_id.write('{0}\n'.format('source activate {0}'.format(xlib.get_gmap_gsnap_bioconda_code())))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function init'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    INIT_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format('    FORMATTED_INIT_DATETIME=`date --date="@$INIT_DATETIME" "+%Y-%m-%d %H:%M:%S"`'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    echo "Script started in node $HOSTNAME of cluster {0} at $FORMATTED_INIT_DATETIME UTC."'.format(cluster_name)))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function build_gmap_database'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    cd {0}'.format(current_run_dir)))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    /usr/bin/time \\'))
            file_id.write('{0}\n'.format('        --format="$SEP\\nElapsed real time (s): %e\\nCPU time in kernel mode (s): %S\\nCPU time in user mode (s): %U\\nPercentage of CPU: %P\\nMaximum resident set size(Kb): %M\\nAverage total memory use (Kb):%K" \\'))
            file_id.write('{0}\n'.format('        gmap_build \\'))
            file_id.write('{0}\n'.format('            --dir={0}\\'.format(cluster_reference_dataset_dir)))
            file_id.write('{0}\n'.format('            --db={0}\\'.format(gmap_database)))
            if kmer.upper() != 'NONE':
                file_id.write('{0}\n'.format('            --kmer={0} \\'.format(kmer)))
            file_id.write('{0}\n'.format('            {0}'.format(cluster_reference_file)))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function run_gmap_process'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    cd {0}'.format(current_run_dir)))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    gmap --version'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    /usr/bin/time \\'))
            file_id.write('{0}\n'.format('        --format="$SEP\\nElapsed real time (s): %e\\nCPU time in kernel mode (s): %S\\nCPU time in user mode (s): %U\\nPercentage of CPU: %P\\nMaximum resident set size(Kb): %M\\nAverage total memory use (Kb):%K" \\'))
            file_id.write('{0}\n'.format('        gmap \\'))
            file_id.write('{0}\n'.format('            --nthreads={0} \\'.format(threads)))
            file_id.write('{0}\n'.format('            --dir={0} \\'.format(cluster_reference_dataset_dir)))
            file_id.write('{0}\n'.format('            --db={0} \\'.format(gmap_database)))
            if kmer.upper() != 'NONE':
                file_id.write('{0}\n'.format('            --kmer={0} \\'.format(kmer)))
            if sampling.upper() != 'NONE':
                file_id.write('{0}\n'.format('            --sampling={0} \\'.format(sampling)))
            file_id.write('{0}\n'.format('            --input-buffer-size={0} \\'.format(input_buffer_size)))
            file_id.write('{0}\n'.format('            --output-buffer-size={0} \\'.format(output_buffer_size)))
            file_id.write('{0}\n'.format('            --prunelevel={0} \\'.format(prunelevel)))
            if format.upper() == 'COMPRESS':
                file_id.write('{0}\n'.format('            --compress \\'))
            elif format.upper() == 'SUMMARY':
                file_id.write('{0}\n'.format('            --summary \\'))
            elif format.upper() == 'ALIGN':
                file_id.write('{0}\n'.format('            --align \\'))
            else:
                file_id.write('{0}\n'.format('            --format={0} \\'.format(format.lower())))
            file_id.write('{0}\n'.format('            --ordered \\'))
            file_id.write('{0}\n'.format('            --nofails \\'))
            if other_parameters.upper() != 'NONE':
                parameter_list = [x.strip() for x in other_parameters.split(';')]
                for i in range(len(parameter_list)):
                    if parameter_list[i].find('=') > 0:
                        pattern = r'^--(.+)=(.+)$'
                        mo = re.search(pattern, parameter_list[i])
                        parameter_name = mo.group(1).strip()
                        parameter_value = mo.group(2).strip()
                        file_id.write('{0}\n'.format('            --{0}={1} \\'.format(parameter_name, parameter_value)))
                    else:
                        pattern = r'^--(.+)$'
                        mo = re.search(pattern, parameter_list[i])
                        parameter_name = mo.group(1).strip()
                        file_id.write('{0}\n'.format('            --{0} \\'.format(parameter_name)))
            file_id.write('{0}\n'.format('            {0} \\'.format(transcriptome_file)))
            file_id.write('{0}\n'.format('            > {0}'.format(output_file)))
            file_id.write('{0}\n'.format('    RC=$?'))
            file_id.write('{0}\n'.format('    if [ $RC -ne 0 ]; then manage_error gmap $RC; fi'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function end'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    END_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format('    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`'))
            file_id.write('{0}\n'.format('    calculate_duration'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    echo "Script ended OK at $FORMATTED_END_DATETIME UTC with a run duration of $DURATION s ($FORMATTED_DURATION)."'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    RECIPIENT={0}'.format(xconfiguration.get_contact_data())))
            file_id.write('{0}\n'.format('    SUBJECT="{0}: {1} process"'.format(xlib.get_project_name(), xlib.get_gmap_name())))
            file_id.write('{0}\n'.format('    MESSAGE="The {0} process in node $HOSTNAME of cluster {1} ended OK at $FORMATTED_END_DATETIME with a run duration of $DURATION s ($FORMATTED_DURATION). Please review its log.<br/><br/>Regards,<br/>GI Genetica, Fisiologia e Historia Forestal<br/>Dpto. Sistemas y Recursos Naturales<br/>ETSI Montes, Forestal y del Medio Natural<br/>Universidad Politecnica de Madrid<br/>https://github.com/ggfhf/"'.format(xlib.get_gmap_name(), cluster_name)))
            file_id.write('{0}\n'.format('    mail --append "Content-type: text/html;"  --subject "$SUBJECT" "$RECIPIENT" <<< "$MESSAGE"'))
            file_id.write('{0}\n'.format('    exit 0'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function manage_error'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    END_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format('    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`'))
            file_id.write('{0}\n'.format('    calculate_duration'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    echo "ERROR: $1 returned error $2"'))
            file_id.write('{0}\n'.format('    echo "Script ended WRONG at $FORMATTED_END_DATETIME UTC with a run duration of $DURATION s ($FORMATTED_DURATION)."'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    RECIPIENT={0}'.format(xconfiguration.get_contact_data())))
            file_id.write('{0}\n'.format('    SUBJECT="{0}: {1} process"'.format(xlib.get_project_name(), xlib.get_gmap_name())))
            file_id.write('{0}\n'.format('    MESSAGE="The {0} process in node $HOSTNAME of cluster {1} ended WRONG at $FORMATTED_END_DATETIME with a run duration of $DURATION s ($FORMATTED_DURATION). Please review its log.<br/><br/>Regards,<br/>GI Genetica, Fisiologia e Historia Forestal<br/>Dpto. Sistemas y Recursos Naturales<br/>ETSI Montes, Forestal y del Medio Natural<br/>Universidad Politecnica de Madrid<br/>https://github.com/ggfhf/"'.format(xlib.get_gmap_name(), cluster_name)))
            file_id.write('{0}\n'.format('    mail --append "Content-type: text/html;"  --subject "$SUBJECT" "$RECIPIENT" <<< "$MESSAGE"'))
            file_id.write('{0}\n'.format('    exit 3'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function calculate_duration'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    DURATION=`expr $END_DATETIME - $INIT_DATETIME`'))
            file_id.write('{0}\n'.format('    HH=`expr $DURATION / 3600`'))
            file_id.write('{0}\n'.format('    MM=`expr $DURATION % 3600 / 60`'))
            file_id.write('{0}\n'.format('    SS=`expr $DURATION % 60`'))
            file_id.write('{0}\n'.format('    FORMATTED_DURATION=`printf "%03d:%02d:%02d\\n" $HH $MM $SS`'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('init'))
            file_id.write('{0}\n'.format('build_gmap_database'))
            file_id.write('{0}\n'.format('run_gmap_process'))
            file_id.write('{0}\n'.format('end'))
    except:
        error_list.append('*** ERROR: The file {0} can not be created'.format(gmap_process_script))
        OK = False

    # return the control variable and the error list
    return (OK, error_list)
Beispiel #4
0
def build_cd_hit_est_process_script(cluster_name, current_run_dir):
    '''
    Build the current CD-HIT-EST process script.
    '''

    # initialize the control variable and the error list
    OK = True
    error_list = []

    # get the option dictionary
    cd_hit_est_option_dict = xlib.get_option_dict(get_cd_hit_est_config_file())

    # get the options
    experiment_id = cd_hit_est_option_dict['identification']['experiment_id']
    assembly_software = cd_hit_est_option_dict['identification'][
        'assembly_software']
    assembly_dataset_id = cd_hit_est_option_dict['identification'][
        'assembly_dataset_id']
    assembly_type = cd_hit_est_option_dict['identification']['assembly_type']
    threads = cd_hit_est_option_dict['CD-HIT-EST parameters']['threads']
    memory_limit = cd_hit_est_option_dict['CD-HIT-EST parameters'][
        'memory_limit']
    seq_identity_threshold = cd_hit_est_option_dict['CD-HIT-EST parameters'][
        'seq_identity_threshold']
    word_length = cd_hit_est_option_dict['CD-HIT-EST parameters'][
        'word_length']
    mask = cd_hit_est_option_dict['CD-HIT-EST parameters']['mask']
    match = cd_hit_est_option_dict['CD-HIT-EST parameters']['match']
    mismatch = cd_hit_est_option_dict['CD-HIT-EST parameters']['mismatch']
    other_parameters = cd_hit_est_option_dict['CD-HIT-EST parameters'][
        'other_parameters']

    # set the transcriptome file path
    if assembly_software == xlib.get_soapdenovotrans_code():
        if assembly_type == 'CONTIGS':
            transcriptome_file = '{0}/{1}-{2}.contig'.format(
                xlib.get_cluster_experiment_result_dataset_dir(
                    experiment_id, assembly_dataset_id), experiment_id,
                assembly_dataset_id)
        elif assembly_type == 'SCAFFOLDS':
            transcriptome_file = '{0}/{1}-{2}.scafSeq'.format(
                xlib.get_cluster_experiment_result_dataset_dir(
                    experiment_id, assembly_dataset_id), experiment_id,
                assembly_dataset_id)
    elif assembly_software == xlib.get_transabyss_code():
        transcriptome_file = '{0}/transabyss-final.fa'.format(
            xlib.get_cluster_experiment_result_dataset_dir(
                experiment_id, assembly_dataset_id))
    elif assembly_software == xlib.get_trinity_code():
        transcriptome_file = '{0}/Trinity.fasta'.format(
            xlib.get_cluster_experiment_result_dataset_dir(
                experiment_id, assembly_dataset_id))
    elif assembly_software == xlib.get_star_code():
        transcriptome_file = '{0}/Trinity-GG.fasta'.format(
            xlib.get_cluster_experiment_result_dataset_dir(
                experiment_id, assembly_dataset_id))
    elif assembly_software == xlib.get_cd_hit_est_code():
        transcriptome_file = '{0}/clustered-transcriptome.fasta'.format(
            xlib.get_cluster_experiment_result_dataset_dir(
                experiment_id, assembly_dataset_id))
    elif assembly_software == xlib.get_transcript_filter_code():
        transcriptome_file = '{0}/filtered-transcriptome.fasta'.format(
            xlib.get_cluster_experiment_result_dataset_dir(
                experiment_id, assembly_dataset_id))

    # set the output file path
    if OK:
        output_file = '{0}/clustered-transcriptome.fasta'.format(
            current_run_dir)

    # write the CD-HIT-EST process script
    try:
        if not os.path.exists(os.path.dirname(
                get_cd_hit_est_process_script())):
            os.makedirs(os.path.dirname(get_cd_hit_est_process_script()))
        with open(get_cd_hit_est_process_script(),
                  mode='w',
                  encoding='utf8',
                  newline='\n') as file_id:
            file_id.write('{0}\n'.format('#!/bin/bash'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format(
                'CDHIT_PATH={0}/{1}/envs/{2}/bin'.format(
                    xlib.get_cluster_app_dir(), xlib.get_miniconda3_name(),
                    xlib.get_cd_hit_bioconda_code())))
            file_id.write('{0}\n'.format('PATH=$CDHIT_PATH:$PATH'))
            file_id.write('{0}\n'.format(
                'SEP="#########################################"'))
            file_id.write('{0}\n'.format('cd {0}/{1}/bin'.format(
                xlib.get_cluster_app_dir(), xlib.get_miniconda3_name())))
            file_id.write('{0}\n'.format('source activate {0}'.format(
                xlib.get_cd_hit_bioconda_code())))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function init'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    INIT_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format(
                '    FORMATTED_INIT_DATETIME=`date --date="@$INIT_DATETIME" "+%Y-%m-%d %H:%M:%S"`'
            ))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format(
                '    echo "Script started in node $HOSTNAME of cluster {0} at $FORMATTED_INIT_DATETIME UTC."'
                .format(cluster_name)))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function run_cd_hit_est_process'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    cd {0}'.format(current_run_dir)))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format(
                '    echo "Running {0} process ..."'.format(
                    xlib.get_cd_hit_est_name())))
            file_id.write('{0}\n'.format('    /usr/bin/time \\'))
            file_id.write('{0}\n'.format(
                '        --format="$SEP\\nElapsed real time (s): %e\\nCPU time in kernel mode (s): %S\\nCPU time in user mode (s): %U\\nPercentage of CPU: %P\\nMaximum resident set size(Kb): %M\\nAverage total memory use (Kb):%K" \\'
            ))
            file_id.write('{0}\n'.format('        cd-hit-est \\'))
            file_id.write('{0}\n'.format(
                '            -T {0} \\'.format(threads)))
            file_id.write('{0}\n'.format(
                '            -M {0} \\'.format(memory_limit)))
            file_id.write('{0}\n'.format(
                '            -i {0} \\'.format(transcriptome_file)))
            file_id.write('{0}\n'.format(
                '            -c {0} \\'.format(seq_identity_threshold)))
            file_id.write('{0}\n'.format(
                '            -n {0} \\'.format(word_length)))
            file_id.write('{0}\n'.format(
                '            -mask {0} \\'.format(mask)))
            file_id.write('{0}\n'.format(
                '            -match {0} \\'.format(match)))
            file_id.write('{0}\n'.format(
                '            -mismatch {0} \\'.format(mismatch)))
            if other_parameters.upper() == 'NONE':
                file_id.write('{0}\n'.format(
                    '            -o {0}'.format(output_file)))
            else:
                file_id.write('{0}\n'.format(
                    '            -o {0} \\'.format(output_file)))
                parameter_list = [
                    x.strip() for x in other_parameters.split(';')
                ]
                for i in range(len(parameter_list)):
                    if parameter_list[i].find('=') > 0:
                        pattern = r'^--(.+)=(.+)$'
                        mo = re.search(pattern, parameter_list[i])
                        parameter_name = mo.group(1).strip()
                        parameter_value = mo.group(2).strip()
                        if i < len(parameter_list) - 1:
                            file_id.write('{0}\n'.format(
                                '            -{0} {1} \\'.format(
                                    parameter_name, parameter_value)))
                        else:
                            file_id.write('{0}\n'.format(
                                '            -{0} {1}'.format(
                                    parameter_name, parameter_value)))
                    else:
                        pattern = r'^--(.+)$'
                        mo = re.search(pattern, parameter_list[i])
                        parameter_name = mo.group(1).strip()
                        if i < len(parameter_list):
                            file_id.write('{0}\n'.format(
                                '            -{0} \\'.format(parameter_name)))
                        else:
                            file_id.write('{0}\n'.format(
                                '            -{0}'.format(parameter_name)))
                    i += 1
            file_id.write('{0}\n'.format('    RC=$?'))
            file_id.write('{0}\n'.format(
                '    if [ $RC -ne 0 ]; then manage_error cd-hit-est $RC; fi'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function end'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    END_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format(
                '    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`'
            ))
            file_id.write('{0}\n'.format('    calculate_duration'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format(
                '    echo "Script ended OK at $FORMATTED_END_DATETIME UTC with a run duration of $DURATION s ($FORMATTED_DURATION)."'
            ))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    RECIPIENT={0}'.format(
                xconfiguration.get_contact_data())))
            file_id.write('{0}\n'.format(
                '    SUBJECT="{0}: {1} process"'.format(
                    xlib.get_project_name(), xlib.get_cd_hit_est_name())))
            file_id.write('{0}\n'.format(
                '    MESSAGE="The {0} process in node $HOSTNAME of cluster {0} ended OK at $FORMATTED_END_DATETIME with a run duration of $DURATION s ($FORMATTED_DURATION). Please review its log.<br/><br/>Regards,<br/>GI Genetica, Fisiologia e Historia Forestal<br/>Dpto. Sistemas y Recursos Naturales<br/>ETSI Montes, Forestal y del Medio Natural<br/>Universidad Politecnica de Madrid<br/>https://github.com/ggfhf/"'
                .format(xlib.get_rsem_eval_name(), cluster_name)))
            file_id.write('{0}\n'.format(
                '    mail --append "Content-type: text/html;"  --subject "$SUBJECT" "$RECIPIENT" <<< "$MESSAGE"'
            ))
            file_id.write('{0}\n'.format('    exit 0'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function manage_error'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    END_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format(
                '    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`'
            ))
            file_id.write('{0}\n'.format('    calculate_duration'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write(
                '{0}\n'.format('    echo "ERROR: $1 returned error $2"'))
            file_id.write('{0}\n'.format(
                '    echo "Script ended WRONG at $FORMATTED_END_DATETIME UTC with a run duration of $DURATION s ($FORMATTED_DURATION)."'
            ))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    RECIPIENT={0}'.format(
                xconfiguration.get_contact_data())))
            file_id.write('{0}\n'.format(
                '    SUBJECT="{0}: {1} process"'.format(
                    xlib.get_project_name(), xlib.get_cd_hit_est_name())))
            file_id.write('{0}\n'.format(
                '    MESSAGE="The {0} process in node $HOSTNAME of cluster {0} ended WRONG at $FORMATTED_END_DATETIME with a run duration of $DURATION s ($FORMATTED_DURATION). Please review its log.<br/><br/>Regards,<br/>GI Genetica, Fisiologia e Historia Forestal<br/>Dpto. Sistemas y Recursos Naturales<br/>ETSI Montes, Forestal y del Medio Natural<br/>Universidad Politecnica de Madrid<br/>https://github.com/ggfhf/"'
                .format(xlib.get_rsem_eval_name(), cluster_name)))
            file_id.write('{0}\n'.format(
                '    mail --append "Content-type: text/html;"  --subject "$SUBJECT" "$RECIPIENT" <<< "$MESSAGE"'
            ))
            file_id.write('{0}\n'.format('    exit 3'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function calculate_duration'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format(
                '    DURATION=`expr $END_DATETIME - $INIT_DATETIME`'))
            file_id.write('{0}\n'.format('    HH=`expr $DURATION / 3600`'))
            file_id.write(
                '{0}\n'.format('    MM=`expr $DURATION % 3600 / 60`'))
            file_id.write('{0}\n'.format('    SS=`expr $DURATION % 60`'))
            file_id.write('{0}\n'.format(
                '    FORMATTED_DURATION=`printf "%03d:%02d:%02d\\n" $HH $MM $SS`'
            ))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('init'))
            file_id.write('{0}\n'.format('run_cd_hit_est_process'))
            file_id.write('{0}\n'.format('end'))
    except:
        error_list.append('*** ERROR: The file {0} can not be created'.format(
            get_cd_hit_est_process_script()))
        OK = False

    # return the control variable and the error list
    return (OK, error_list)
Beispiel #5
0
def build_fastqc_process_script(cluster_name, current_run_dir):
    '''
    Build the current FastQC process script.
    '''

    # initialize the control variable and the error list
    OK = True
    error_list = []

    # get the FastQC option dictionary
    fastqc_option_dict = xlib.get_option_dict(get_fastqc_config_file())

    # get the options
    experiment_id = fastqc_option_dict['identification']['experiment_id']
    read_dataset_id = fastqc_option_dict['identification']['read_dataset_id']
    threads = fastqc_option_dict['FastQC parameters']['threads']

    # get the sections list
    sections_list = []
    for section in fastqc_option_dict.keys():
        sections_list.append(section)
    sections_list.sort()

    # build the file name list
    file_name_list = []
    for section in sections_list:
        # if the section identification is like library-n
        if re.match('^file-[0-9]+$', section):
            file_name = fastqc_option_dict[section]['file_name']
            file_name_list.append(file_name)

    # write the FastQC process script
    try:
        if not os.path.exists(os.path.dirname(get_fastqc_process_script())):
            os.makedirs(os.path.dirname(get_fastqc_process_script()))
        with open(get_fastqc_process_script(),
                  mode='w',
                  encoding='utf8',
                  newline='\n') as file_id:
            file_id.write('{0}\n'.format('#!/bin/bash'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format(
                'FASTQC_PATH={0}/{1}/envs/{2}/bin'.format(
                    xlib.get_cluster_app_dir(), xlib.get_miniconda3_name(),
                    xlib.get_fastqc_bioconda_code())))
            file_id.write('{0}\n'.format('PATH=$FASTQC_PATH:$PATH'))
            file_id.write('{0}\n'.format(
                'SEP="#########################################"'))
            file_id.write('{0}\n'.format('cd {0}/{1}/bin'.format(
                xlib.get_cluster_app_dir(), xlib.get_miniconda3_name())))
            file_id.write('{0}\n'.format('source activate {0}'.format(
                xlib.get_fastqc_bioconda_code())))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function init'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    INIT_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format(
                '    FORMATTED_INIT_DATETIME=`date --date="@$INIT_DATETIME" "+%Y-%m-%d %H:%M:%S"`'
            ))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format(
                '    echo "Script started in node $HOSTNAME of cluster {0} at $FORMATTED_INIT_DATETIME UTC."'
                .format(cluster_name)))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function run_fastqc_process'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    cd {0}'.format(current_run_dir)))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    fastqc --version'))
            for file_name in file_name_list:
                file_id.write('{0}\n'.format('    echo "$SEP"'))
                file_id.write('{0}\n'.format('    /usr/bin/time \\'))
                file_id.write('{0}\n'.format(
                    '        --format="$SEP\\nElapsed real time (s): %e\\nCPU time in kernel mode (s): %S\\nCPU time in user mode (s): %U\\nPercentage of CPU: %P\\nMaximum resident set size(Kb): %M\\nAverage total memory use (Kb):%K" \\'
                ))
                file_id.write('{0}\n'.format('        fastqc \\'))
                file_id.write('{0}\n'.format('            {0} \\'.format(
                    xlib.get_cluster_read_file(experiment_id, read_dataset_id,
                                               file_name))))
                file_id.write('{0}\n'.format(
                    '            --threads={0} \\'.format(threads)))
                file_id.write('{0}\n'.format(
                    '            --outdir={0}'.format(current_run_dir)))
                file_id.write('{0}\n'.format('    RC=$?'))
                file_id.write('{0}\n'.format(
                    '    if [ $RC -ne 0 ]; then manage_error fastqc $RC; fi'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function end'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    END_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format(
                '    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`'
            ))
            file_id.write('{0}\n'.format('    calculate_duration'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format(
                '    echo "Script ended OK at $FORMATTED_END_DATETIME UTC with a run duration of $DURATION s ($FORMATTED_DURATION)."'
            ))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    RECIPIENT={0}'.format(
                xconfiguration.get_contact_data())))
            file_id.write('{0}\n'.format(
                '    SUBJECT="{0}: {1} process"'.format(
                    xlib.get_project_name(), xlib.get_fastqc_name())))
            file_id.write('{0}\n'.format(
                '    MESSAGE="The {0} process in node $HOSTNAME of cluster {1} ended OK at $FORMATTED_END_DATETIME with a run duration of $DURATION s ($FORMATTED_DURATION). Please review its log.<br/><br/>Regards,<br/>GI Genetica, Fisiologia e Historia Forestal<br/>Dpto. Sistemas y Recursos Naturales<br/>ETSI Montes, Forestal y del Medio Natural<br/>Universidad Politecnica de Madrid<br/>https://github.com/ggfhf/"'
                .format(xlib.get_fastqc_name(), cluster_name)))
            file_id.write('{0}\n'.format(
                '    mail --append "Content-type: text/html;"  --subject "$SUBJECT" "$RECIPIENT" <<< "$MESSAGE"'
            ))
            file_id.write('{0}\n'.format('    exit 0'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function manage_error'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    END_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format(
                '    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`'
            ))
            file_id.write('{0}\n'.format('    calculate_duration'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write(
                '{0}\n'.format('    echo "ERROR: $1 returned error $2"'))
            file_id.write('{0}\n'.format(
                '    echo "Script ended WRONG at $FORMATTED_END_DATETIME UTC with a run duration of $DURATION s ($FORMATTED_DURATION)."'
            ))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    RECIPIENT={0}'.format(
                xconfiguration.get_contact_data())))
            file_id.write('{0}\n'.format(
                '    SUBJECT="{0}: {1} process"'.format(
                    xlib.get_project_name(), xlib.get_fastqc_name())))
            file_id.write('{0}\n'.format(
                '    MESSAGE="The {0} process in node $HOSTNAME of cluster {1} ended WRONG at $FORMATTED_END_DATETIME with a run duration of $DURATION s ($FORMATTED_DURATION). Please review its log.<br/><br/>Regards,<br/>GI Genetica, Fisiologia e Historia Forestal<br/>Dpto. Sistemas y Recursos Naturales<br/>ETSI Montes, Forestal y del Medio Natural<br/>Universidad Politecnica de Madrid<br/>https://github.com/ggfhf/"'
                .format(xlib.get_fastqc_name(), cluster_name)))
            file_id.write('{0}\n'.format(
                '    mail --append "Content-type: text/html;"  --subject "$SUBJECT" "$RECIPIENT" <<< "$MESSAGE"'
            ))
            file_id.write('{0}\n'.format('    exit 3'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function calculate_duration'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format(
                '    DURATION=`expr $END_DATETIME - $INIT_DATETIME`'))
            file_id.write('{0}\n'.format('    HH=`expr $DURATION / 3600`'))
            file_id.write(
                '{0}\n'.format('    MM=`expr $DURATION % 3600 / 60`'))
            file_id.write('{0}\n'.format('    SS=`expr $DURATION % 60`'))
            file_id.write('{0}\n'.format(
                '    FORMATTED_DURATION=`printf "%03d:%02d:%02d\\n" $HH $MM $SS`'
            ))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('init'))
            file_id.write('{0}\n'.format('run_fastqc_process'))
            file_id.write('{0}\n'.format('end'))
    except:
        error_list.append('*** ERROR: The file {0} can not be created'.format(
            get_fastqc_process_script()))
        OK = False

    # return the control variable and the error list
    return (OK, error_list)
Beispiel #6
0
def form_list_cluster_experiment_processes():
    '''
    List the processes of an experiment in the cluster.
    '''

    # initialize the control variable
    OK = True

    # print the header
    clib.clear_screen()
    clib.print_headers_with_environment('Logs - List experiment processes in the cluster')

    # get the cluster name
    print(xlib.get_separator())
    if xec2.get_running_cluster_list(volume_creator_included=False) != []:
        cluster_name = cinputs.input_cluster_name(volume_creator_included=False, help=True)
    else:
        print('WARNING: There is not any running cluster.')
        OK = False

    # create the SSH client connection
    if OK:
        (OK, error_list, ssh_client) = xssh.create_ssh_client_connection(cluster_name, 'master')
        for error in error_list:
            log.write('{0}\n'.format(error))

    # get experiment identification
    if OK:
        experiment_id = cinputs.input_experiment_id(ssh_client, help=True)
        if experiment_id == '':
            print('WARNING: The cluster {0} has not experiment data.'.format(cluster_name))
            OK = False

    # get the result dataset list of the experiment
    if OK:
        command = 'cd  {0}/{1}; for list in `ls`; do ls -ld $list | grep -v ^- > /dev/null && echo $list; done;'.format(xlib.get_cluster_result_dir(), experiment_id)
        (OK, stdout, stderr) = xssh.execute_cluster_command(ssh_client, command)
        if OK:
            result_dataset_id_list = []
            for line in stdout:
                line = line.rstrip('\n')
                if line != 'lost+found':
                    result_dataset_id_list.append(line)

    # print the result dataset identification list of the experiment
    if OK:
        print(xlib.get_separator())
        if result_dataset_id_list == []:
            print('*** WARNING: There is not any result dataset of the experiment {0}.'.format(experiment_id))
        else:
            result_dataset_id_list.sort()
            # set data width
            result_dataset_width = 25
            bioinfo_app_width = 25
            # set line template
            line_template = '{0:' + str(result_dataset_width) + '}   {1:' + str(bioinfo_app_width) + '}'
            # print header
            print(line_template.format('Result dataset', 'Bioinfo app / Utility'))
            print(line_template.format('=' * result_dataset_width, '=' * bioinfo_app_width))
            # print detail lines
            for result_dataset_id in result_dataset_id_list:
                if result_dataset_id.startswith(xlib.get_bedtools_code()+'-'):
                    bioinfo_app_name = xlib.get_bedtools_name()
                elif result_dataset_id.startswith(xlib.get_blastplus_code()+'-'):
                    bioinfo_app_name = xlib.get_blastplus_name()
                elif result_dataset_id.startswith(xlib.get_bowtie2_code()+'-'):
                    bioinfo_app_name = xlib.get_bowtie2_name()
                elif result_dataset_id.startswith(xlib.get_busco_code()+'-'):
                    bioinfo_app_name = xlib.get_busco_name()
                elif result_dataset_id.startswith(xlib.get_cd_hit_code()+'-'):
                    bioinfo_app_name = xlib.get_cd_hit_est_name()
                elif result_dataset_id.startswith(xlib.get_cd_hit_code()+'-'):
                    bioinfo_app_name = xlib.get_cd_hit_est_name()
                elif result_dataset_id.startswith(xlib.get_detonate_code()+'-'):
                    bioinfo_app_name = xlib.get_detonate_name()
                elif result_dataset_id.startswith(xlib.get_emboss_code()+'-'):
                    bioinfo_app_name = xlib.get_emboss_name()
                elif result_dataset_id.startswith(xlib.get_fastqc_code()+'-'):
                    bioinfo_app_name = xlib.get_fastqc_name()
                elif result_dataset_id.startswith(xlib.get_gmap_code()+'-'):
                    bioinfo_app_name = xlib.get_gmap_name()
                elif result_dataset_id.startswith(xlib.get_gmap_gsnap_code()+'-'):
                    bioinfo_app_name = xlib.get_gmap_gsnap_name()
                elif result_dataset_id.startswith(xlib.get_gzip_code()+'-'):
                    bioinfo_app_name = xlib.get_gzip_name()
                elif result_dataset_id.startswith(xlib.get_insilico_read_normalization_code()+'-'):
                    bioinfo_app_name = xlib.get_insilico_read_normalization_name()
                elif result_dataset_id.startswith(xlib.get_miniconda3_code()+'-'):
                    bioinfo_app_name = xlib.get_miniconda3_name()
                elif result_dataset_id.startswith(xlib.get_ngshelper_code()+'-'):
                    bioinfo_app_name = xlib.get_ngshelper_name()
                elif result_dataset_id.startswith(xlib.get_quast_code()+'-'):
                    bioinfo_app_name = xlib.get_quast_name()
                elif result_dataset_id.startswith(xlib.get_r_code()+'-'):
                    bioinfo_app_name = xlib.get_r_name()
                elif result_dataset_id.startswith(xlib.get_ref_eval_code()+'-'):
                    bioinfo_app_name = xlib.get_ref_eval_name()
                elif result_dataset_id.startswith(xlib.get_rnaquast_code()+'-'):
                    bioinfo_app_name = xlib.get_rnaquast_name()
                elif result_dataset_id.startswith(xlib.get_rsem_code()+'-'):
                    bioinfo_app_name = xlib.get_rsem_name()
                elif result_dataset_id.startswith(xlib.get_rsem_eval_code()+'-'):
                    bioinfo_app_name = xlib.get_rsem_eval_name()
                elif result_dataset_id.startswith(xlib.get_samtools_code()+'-'):
                    bioinfo_app_name = xlib.get_samtools_name()
                elif result_dataset_id.startswith(xlib.get_soapdenovotrans_code()+'-'):
                    bioinfo_app_name = xlib.get_soapdenovotrans_name()
                elif result_dataset_id.startswith(xlib.get_star_code()+'-'):
                    bioinfo_app_name = xlib.get_star_name()
                elif result_dataset_id.startswith(xlib.get_transabyss_code()+'-'):
                    bioinfo_app_name = xlib.get_transabyss_name()
                elif result_dataset_id.startswith(xlib.get_transcript_filter_code()+'-'):
                    bioinfo_app_name = xlib.get_transcript_filter_name()
                elif result_dataset_id.startswith(xlib.get_transcriptome_blastx_code()+'-'):
                    bioinfo_app_name = xlib.get_transcriptome_blastx_name()
                elif result_dataset_id.startswith(xlib.get_transrate_code()+'-'):
                    bioinfo_app_name = xlib.get_transrate_name()
                elif result_dataset_id.startswith(xlib.get_trimmomatic_code()+'-'):
                    bioinfo_app_name = xlib.get_trimmomatic_name()
                elif result_dataset_id.startswith(xlib.get_trinity_code()+'-'):
                    bioinfo_app_name = xlib.get_trinity_name()
                else:
                    bioinfo_app_name = 'xxx'
                print(line_template.format(result_dataset_id, bioinfo_app_name))

    # close the SSH client connection
    if OK:
        xssh.close_ssh_client_connection(ssh_client)

    # show continuation message 
    print(xlib.get_separator())
    input('Press [Intro] to continue ...')
Beispiel #7
0
def build_quast_process_script(cluster_name, current_run_dir):
    '''
    Build the current QUAST process script.
    '''

    # initialize the control variable and the error list
    OK = True
    error_list = []

    # get the QUAST option dictionary
    quast_option_dict = xlib.get_option_dict(get_quast_config_file())

    # get the options
    experiment_id = quast_option_dict['identification']['experiment_id']
    reference_dataset_id = quast_option_dict['identification']['reference_dataset_id']
    reference_file = quast_option_dict['identification']['reference_file']
    assembly_software = quast_option_dict['identification']['assembly_software']
    assembly_dataset_id = quast_option_dict['identification']['assembly_dataset_id']
    assembly_type = quast_option_dict['identification']['assembly_type']
    threads = quast_option_dict['QUAST parameters']['threads']

    # set the reference file path
    if reference_dataset_id.upper() != 'NONE':
        reference_file_path = xlib.get_cluster_reference_file(reference_dataset_id, reference_file)

    # set the transcriptome file path
    if assembly_software == xlib.get_soapdenovotrans_code():
        if assembly_type.upper() == 'CONTIGS':
            transcriptome_file = '{0}/{1}-{2}.contig'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id), experiment_id, assembly_dataset_id)
        elif assembly_type.upper() == 'SCAFFOLDS':
            transcriptome_file = '{0}/{1}-{2}.scafSeq'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id), experiment_id, assembly_dataset_id)
    elif assembly_software == xlib.get_transabyss_code():
        transcriptome_file = '{0}/transabyss-final.fa'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id))
    elif assembly_software == xlib.get_trinity_code():
        transcriptome_file = '{0}/Trinity.fasta'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id))
    elif assembly_software == xlib.get_star_code():
        transcriptome_file = '{0}/Trinity-GG.fasta'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id))
    elif assembly_software == xlib.get_cd_hit_est_code():
        transcriptome_file = '{0}/clustered-transcriptome.fasta'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id))
    elif assembly_software == xlib.get_transcript_filter_code():
        transcriptome_file = '{0}/filtered-transcriptome.fasta'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id))

    # get the QUAST process script name
    quast_process_script = get_quast_process_script()

    # write the QUAST process script
    try:
        if not os.path.exists(os.path.dirname(quast_process_script)):
            os.makedirs(os.path.dirname(quast_process_script))
        with open(quast_process_script, mode='w', encoding='utf8', newline='\n') as file_id:
            file_id.write('{0}\n'.format('#!/bin/bash'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('QUAST_PATH={0}/{1}/envs/{2}/bin'.format(xlib.get_cluster_app_dir(), xlib.get_miniconda3_name(), xlib.get_quast_bioconda_code())))
            file_id.write('{0}\n'.format('PATH=$QUAST_PATH:$PATH'))
            file_id.write('{0}\n'.format('SEP="#########################################"'))
            file_id.write('{0}\n'.format('cd {0}/{1}/bin'.format(xlib.get_cluster_app_dir(), xlib.get_miniconda3_name())))
            file_id.write('{0}\n'.format('source activate {0}'.format(xlib.get_quast_bioconda_code())))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function init'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    INIT_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format('    FORMATTED_INIT_DATETIME=`date --date="@$INIT_DATETIME" "+%Y-%m-%d %H:%M:%S"`'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    echo "Script started in node $HOSTNAME of cluster {0} at $FORMATTED_INIT_DATETIME UTC."'.format(cluster_name)))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function run_quast_process'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    cd {0}'.format(current_run_dir)))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    quast.py --version'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    /usr/bin/time \\'))
            file_id.write('{0}\n'.format('        --format="$SEP\\nElapsed real time (s): %e\\nCPU time in kernel mode (s): %S\\nCPU time in user mode (s): %U\\nPercentage of CPU: %P\\nMaximum resident set size(Kb): %M\\nAverage total memory use (Kb):%K" \\'))
            file_id.write('{0}\n'.format('        quast.py \\'))
            file_id.write('{0}\n'.format('            --threads {0} \\'.format(threads)))
            file_id.write('{0}\n'.format('            --output-dir {0} \\'.format(current_run_dir)))
            if reference_dataset_id.upper() != 'NONE':
                file_id.write('{0}\n'.format('            -R {0} \\'.format(reference_file_path)))
            if assembly_type.upper() == 'SCAFFOLDS':
                file_id.write('{0}\n'.format('            --scaffolds \\'))
            file_id.write('{0}\n'.format('            {0}'.format(transcriptome_file)))
            file_id.write('{0}\n'.format('    RC=$?'))
            file_id.write('{0}\n'.format('    if [ $RC -ne 0 ]; then manage_error quast.py $RC; fi'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function end'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    END_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format('    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`'))
            file_id.write('{0}\n'.format('    calculate_duration'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    echo "Script ended OK at $FORMATTED_END_DATETIME UTC with a run duration of $DURATION s ($FORMATTED_DURATION)."'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    RECIPIENT={0}'.format(xconfiguration.get_contact_data())))
            file_id.write('{0}\n'.format('    SUBJECT="{0}: {1} process"'.format(xlib.get_project_name(), xlib.get_quast_name())))
            file_id.write('{0}\n'.format('    MESSAGE="The {0} process in node $HOSTNAME of cluster {1} ended OK at $FORMATTED_END_DATETIME with a run duration of $DURATION s ($FORMATTED_DURATION). Please review its log.<br/><br/>Regards,<br/>GI Genetica, Fisiologia e Historia Forestal<br/>Dpto. Sistemas y Recursos Naturales<br/>ETSI Montes, Forestal y del Medio Natural<br/>Universidad Politecnica de Madrid<br/>https://github.com/ggfhf/"'.format(xlib.get_quast_name(), cluster_name)))
            file_id.write('{0}\n'.format('    mail --append "Content-type: text/html;"  --subject "$SUBJECT" "$RECIPIENT" <<< "$MESSAGE"'))
            file_id.write('{0}\n'.format('    exit 0'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function manage_error'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    END_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format('    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`'))
            file_id.write('{0}\n'.format('    calculate_duration'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    echo "ERROR: $1 returned error $2"'))
            file_id.write('{0}\n'.format('    echo "Script ended WRONG at $FORMATTED_END_DATETIME UTC with a run duration of $DURATION s ($FORMATTED_DURATION)."'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    RECIPIENT={0}'.format(xconfiguration.get_contact_data())))
            file_id.write('{0}\n'.format('    SUBJECT="{0}: {1} process"'.format(xlib.get_project_name(), xlib.get_quast_name())))
            file_id.write('{0}\n'.format('    MESSAGE="The {0} process in node $HOSTNAME of cluster {1} ended WRONG at $FORMATTED_END_DATETIME with a run duration of $DURATION s ($FORMATTED_DURATION). Please review its log.<br/><br/>Regards,<br/>GI Genetica, Fisiologia e Historia Forestal<br/>Dpto. Sistemas y Recursos Naturales<br/>ETSI Montes, Forestal y del Medio Natural<br/>Universidad Politecnica de Madrid<br/>https://github.com/ggfhf/"'.format(xlib.get_quast_name(), cluster_name)))
            file_id.write('{0}\n'.format('    mail --append "Content-type: text/html;"  --subject "$SUBJECT" "$RECIPIENT" <<< "$MESSAGE"'))
            file_id.write('{0}\n'.format('    exit 3'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function calculate_duration'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    DURATION=`expr $END_DATETIME - $INIT_DATETIME`'))
            file_id.write('{0}\n'.format('    HH=`expr $DURATION / 3600`'))
            file_id.write('{0}\n'.format('    MM=`expr $DURATION % 3600 / 60`'))
            file_id.write('{0}\n'.format('    SS=`expr $DURATION % 60`'))
            file_id.write('{0}\n'.format('    FORMATTED_DURATION=`printf "%03d:%02d:%02d\\n" $HH $MM $SS`'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('init'))
            file_id.write('{0}\n'.format('run_quast_process'))
            file_id.write('{0}\n'.format('end'))
    except:
        error_list.append('*** ERROR: The file {0} can not be created'.format(quast_process_script))
        OK = False

    # return the control variable and the error list
    return (OK, error_list)
Beispiel #8
0
def build_busco_process_script(cluster_name, current_run_dir):
    '''
    Build the current BUSCO process script.
    '''

    # initialize the control variable and the error list
    OK = True
    error_list = []

    # get the BUSCO option dictionary
    busco_option_dict = xlib.get_option_dict(get_busco_config_file())

    # get the options
    experiment_id = busco_option_dict['identification']['experiment_id']
    assembly_software = busco_option_dict['identification']['assembly_software']
    assembly_dataset_id = busco_option_dict['identification']['assembly_dataset_id']
    assembly_type = busco_option_dict['identification']['assembly_type']
    ncpu = busco_option_dict['BUSCO parameters']['ncpu']
    lineage_data = busco_option_dict['BUSCO parameters']['lineage_data']
    lineage_data_file = '{0}.tar.gz'.format(lineage_data)
    lineage_data_url = 'http://busco.ezlab.org/v2/datasets/{0}'.format(lineage_data_file)
    mode = busco_option_dict['BUSCO parameters']['mode'].lower()
    evalue = busco_option_dict['BUSCO parameters']['evalue']
    limit = busco_option_dict['BUSCO parameters']['limit']
    species = busco_option_dict['BUSCO parameters']['species']
    long = busco_option_dict['BUSCO parameters']['long'].upper()
    augustus_options = busco_option_dict['BUSCO parameters']['augustus_options'].upper()

    # set the transcriptome file path
    if assembly_software == xlib.get_soapdenovotrans_code():
        if assembly_type == 'CONTIGS':
            transcriptome_file = '{0}/{1}-{2}.contig'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id), experiment_id, assembly_dataset_id)
        elif  assembly_type == 'SCAFFOLDS':
            transcriptome_file = '{0}/{1}-{2}.scafSeq'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id), experiment_id, assembly_dataset_id)
    elif assembly_software == xlib.get_transabyss_code():
        transcriptome_file = '{0}/transabyss-final.fa'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id))
    elif assembly_software == xlib.get_trinity_code():
        transcriptome_file = '{0}/Trinity.fasta'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id))
    elif assembly_software == xlib.get_star_code():
        transcriptome_file = '{0}/Trinity-GG.fasta'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id))
    elif assembly_software == xlib.get_cd_hit_est_code():
        transcriptome_file = '{0}/clustered-transcriptome.fasta'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id))
    elif assembly_software == xlib.get_transcript_filter_code():
        transcriptome_file = '{0}/filtered-transcriptome.fasta'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id))

    # write the BUSCO process script
    try:
        if not os.path.exists(os.path.dirname(get_busco_process_script())):
            os.makedirs(os.path.dirname(get_busco_process_script()))
        with open(get_busco_process_script(), mode='w', encoding='utf8', newline='\n') as file_id:
            file_id.write('{0}\n'.format('#!/bin/bash'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('BUSCO_PATH={0}/{1}/envs/{2}/bin'.format(xlib.get_cluster_app_dir(), xlib.get_miniconda3_name(), xlib.get_busco_bioconda_code())))
            file_id.write('{0}\n'.format('export PATH=$BUSCO_PATH:$PATH'))
            file_id.write('{0}\n'.format('SEP="#########################################"'))
            file_id.write('{0}\n'.format('cd {0}/{1}/bin'.format(xlib.get_cluster_app_dir(), xlib.get_miniconda3_name())))
            file_id.write('{0}\n'.format('source activate {0}'.format(xlib.get_busco_bioconda_code())))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function init'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    INIT_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format('    FORMATTED_INIT_DATETIME=`date --date="@$INIT_DATETIME" "+%Y-%m-%d %H:%M:%S"`'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    echo "Script started in node $HOSTNAME of cluster {0} at $FORMATTED_INIT_DATETIME UTC."'.format(cluster_name)))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function download_lineage_data'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    cd {0}'.format(current_run_dir)))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    echo "Downloading lineage data ..."'))
            file_id.write('{0}\n'.format('    wget --quiet --output-document ./{0} {1}'.format(lineage_data_file, lineage_data_url)))
            file_id.write('{0}\n'.format('    tar -xzvf ./{0}'.format(lineage_data_file)))
            file_id.write('{0}\n'.format('    rm ./{0}'.format(lineage_data_file)))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function run_busco_process'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    cd {0}'.format(current_run_dir)))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    run_BUSCO.py --version'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    /usr/bin/time \\'))
            file_id.write('{0}\n'.format('        --format="$SEP\\nElapsed real time (s): %e\\nCPU time in kernel mode (s): %S\\nCPU time in user mode (s): %U\\nPercentage of CPU: %P\\nMaximum resident set size(Kb): %M\\nAverage total memory use (Kb):%K" \\'))
            file_id.write('{0}\n'.format('        run_BUSCO.py \\'))
            file_id.write('{0}\n'.format('            --cpu={0} \\'.format(ncpu)))
            file_id.write('{0}\n'.format('            --lineage_path=./{0} \\'.format(lineage_data)))
            file_id.write('{0}\n'.format('            --mode={0} \\'.format(mode)))
            file_id.write('{0}\n'.format('            --evalue={0} \\'.format(evalue)))
            file_id.write('{0}\n'.format('            --limit={0} \\'.format(limit)))
            if species.upper() != 'NONE':
                file_id.write('{0}\n'.format('            --species={0} \\'.format(species)))
            if long == 'YES':
                file_id.write('{0}\n'.format('            --long \\'))
            if augustus_options.upper() != 'NONE':
                file_id.write('{0}\n'.format("            --august_options='{0}' \\".format(augustus_options)))
            file_id.write('{0}\n'.format('            --in={0} \\'.format(transcriptome_file)))
            file_id.write('{0}\n'.format('            --out={0}'.format(os.path.basename(current_run_dir))))
            file_id.write('{0}\n'.format('    RC=$?'))
            file_id.write('{0}\n'.format('    if [ $RC -ne 0 ]; then manage_error run_BUSCO.py $RC; fi'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function end'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    END_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format('    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`'))
            file_id.write('{0}\n'.format('    calculate_duration'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    echo "Script ended OK at $FORMATTED_END_DATETIME UTC with a run duration of $DURATION s ($FORMATTED_DURATION)."'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    RECIPIENT={0}'.format(xconfiguration.get_contact_data())))
            file_id.write('{0}\n'.format('    SUBJECT="{0}: {1} process"'.format(xlib.get_project_name(), xlib.get_busco_name())))
            file_id.write('{0}\n'.format('    MESSAGE="The {0} process in node $HOSTNAME of cluster {1} ended OK at $FORMATTED_END_DATETIME with a run duration of $DURATION s ($FORMATTED_DURATION). Please review its log.<br/><br/>Regards,<br/>GI Genetica, Fisiologia e Historia Forestal<br/>Dpto. Sistemas y Recursos Naturales<br/>ETSI Montes, Forestal y del Medio Natural<br/>Universidad Politecnica de Madrid<br/>https://github.com/ggfhf/"'.format(xlib.get_busco_name(), cluster_name)))
            file_id.write('{0}\n'.format('    mail --append "Content-type: text/html;"  --subject "$SUBJECT" "$RECIPIENT" <<< "$MESSAGE"'))
            file_id.write('{0}\n'.format('    exit 0'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function manage_error'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    END_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format('    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`'))
            file_id.write('{0}\n'.format('    calculate_duration'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    echo "ERROR: $1 returned error $2"'))
            file_id.write('{0}\n'.format('    echo "Script ended WRONG at $FORMATTED_END_DATETIME UTC with a run duration of $DURATION s ($FORMATTED_DURATION)."'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    RECIPIENT={0}'.format(xconfiguration.get_contact_data())))
            file_id.write('{0}\n'.format('    SUBJECT="{0}: {1} process"'.format(xlib.get_project_name(), xlib.get_busco_name())))
            file_id.write('{0}\n'.format('    MESSAGE="The {0} process in node $HOSTNAME of cluster {1} ended WRONG at $FORMATTED_END_DATETIME with a run duration of $DURATION s ($FORMATTED_DURATION). Please review its log.<br/><br/>Regards,<br/>GI Genetica, Fisiologia e Historia Forestal<br/>Dpto. Sistemas y Recursos Naturales<br/>ETSI Montes, Forestal y del Medio Natural<br/>Universidad Politecnica de Madrid<br/>https://github.com/ggfhf/"'.format(xlib.get_busco_name(), cluster_name)))
            file_id.write('{0}\n'.format('    mail --append "Content-type: text/html;"  --subject "$SUBJECT" "$RECIPIENT" <<< "$MESSAGE"'))
            file_id.write('{0}\n'.format('    exit 3'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function calculate_duration'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    DURATION=`expr $END_DATETIME - $INIT_DATETIME`'))
            file_id.write('{0}\n'.format('    HH=`expr $DURATION / 3600`'))
            file_id.write('{0}\n'.format('    MM=`expr $DURATION % 3600 / 60`'))
            file_id.write('{0}\n'.format('    SS=`expr $DURATION % 60`'))
            file_id.write('{0}\n'.format('    FORMATTED_DURATION=`printf "%03d:%02d:%02d\\n" $HH $MM $SS`'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('init'))
            file_id.write('{0}\n'.format('download_lineage_data'))
            file_id.write('{0}\n'.format('run_busco_process'))
            file_id.write('{0}\n'.format('end'))
    except:
        error_list.append('*** ERROR: The file {0} can not be created'.format(get_busco_process_script()))
        OK = False

    # return the control variable and the error list
    return (OK, error_list)
Beispiel #9
0
    def execute(self, event=None):
        '''
        Execute the list the result logs in the cluster.
        '''

        # validate inputs
        OK = self.validate_inputs()
        if not OK:
            message = 'Some input values are not OK.'
            tkinter.messagebox.showerror('{0} - {1}'.format(xlib.get_project_name(), self.head), message)

        # get the run dictionary of the experiment
        if OK:
            # -- command = 'ls {0}/{1}'.format(xlib.get_cluster_result_dir(), self.wrapper_experiment_id.get())
            command = 'cd  {0}/{1}; for list in `ls`; do ls -ld $list | grep -v ^- > /dev/null && echo $list; done;'.format(xlib.get_cluster_result_dir(), self.wrapper_experiment_id.get())
            (OK, stdout, stderr) = xssh.execute_cluster_command(self.ssh_client, command)
            if OK:
                result_dataset_dict = {}
                for line in stdout:
                    line = line.rstrip('\n')
                    if line != 'lost+found':
                        result_dataset_id = line
                        try:
                            pattern = r'^(.+)\-(.+)\-(.+)$'
                            mo = re.search(pattern, result_dataset_id)
                            bioinfo_app_code = mo.group(1).strip()
                            yymmdd = mo.group(2)
                            hhmmss = mo.group(3)
                            date = '20{0}-{1}-{2}'.format(yymmdd[:2], yymmdd[2:4], yymmdd[4:])
                            time = '{0}:{1}:{2}'.format(hhmmss[:2], hhmmss[2:4], hhmmss[4:])
                        except:
                            bioinfo_app_code = 'xxx'
                            date = '0000-00-00'
                            time = '00:00:00'
                        if result_dataset_id.startswith(xlib.get_bedtools_code()+'-'):
                            bioinfo_app_name = xlib.get_bedtools_name()
                        elif result_dataset_id.startswith(xlib.get_blastplus_code()+'-'):
                            bioinfo_app_name = xlib.get_blastplus_name()
                        elif result_dataset_id.startswith(xlib.get_bowtie2_code()+'-'):
                            bioinfo_app_name = xlib.get_bowtie2_name()
                        elif result_dataset_id.startswith(xlib.get_busco_code()+'-'):
                            bioinfo_app_name = xlib.get_busco_name()
                        elif result_dataset_id.startswith(xlib.get_cd_hit_code()+'-'):
                            bioinfo_app_name = xlib.get_cd_hit_name()
                        elif result_dataset_id.startswith(xlib.get_cd_hit_est_code()+'-'):
                            bioinfo_app_name = xlib.get_cd_hit_est_name()
                        elif result_dataset_id.startswith(xlib.get_detonate_code()+'-'):
                            bioinfo_app_name = xlib.get_detonate_name()
                        elif result_dataset_id.startswith(xlib.get_emboss_code()+'-'):
                            bioinfo_app_name = xlib.get_emboss_name()
                        elif result_dataset_id.startswith(xlib.get_fastqc_code()+'-'):
                            bioinfo_app_name = xlib.get_fastqc_name()
                        elif result_dataset_id.startswith(xlib.get_gmap_code()+'-'):
                            bioinfo_app_name = xlib.get_gmap_name()
                        elif result_dataset_id.startswith(xlib.get_gmap_gsnap_code()+'-'):
                            bioinfo_app_name = xlib.get_gmap_gsnap_name()
                        elif result_dataset_id.startswith(xlib.get_gzip_code()+'-'):
                            bioinfo_app_name = xlib.get_gzip_name()
                        elif result_dataset_id.startswith(xlib.get_insilico_read_normalization_code()+'-'):
                            bioinfo_app_name = xlib.get_insilico_read_normalization_name()
                        elif result_dataset_id.startswith(xlib.get_miniconda3_code()+'-'):
                            bioinfo_app_name = xlib.get_miniconda3_name()
                        elif result_dataset_id.startswith(xlib.get_ngshelper_code()+'-'):
                            bioinfo_app_name = xlib.get_ngshelper_name()
                        elif result_dataset_id.startswith(xlib.get_quast_code()+'-'):
                            bioinfo_app_name = xlib.get_quast_name()
                        elif result_dataset_id.startswith(xlib.get_r_code()+'-'):
                            bioinfo_app_name = xlib.get_r_name()
                        elif result_dataset_id.startswith(xlib.get_ref_eval_code()+'-'):
                            bioinfo_app_name = xlib.get_ref_eval_name()
                        elif result_dataset_id.startswith(xlib.get_rnaquast_code()+'-'):
                            bioinfo_app_name = xlib.get_rnaquast_name()
                        elif result_dataset_id.startswith(xlib.get_rsem_code()+'-'):
                            bioinfo_app_name = xlib.get_rsem_name()
                        elif result_dataset_id.startswith(xlib.get_rsem_eval_code()+'-'):
                            bioinfo_app_name = xlib.get_rsem_eval_name()
                        elif result_dataset_id.startswith(xlib.get_samtools_code()+'-'):
                            bioinfo_app_name = xlib.get_samtools_name()
                        elif result_dataset_id.startswith(xlib.get_soapdenovotrans_code()+'-'):
                            bioinfo_app_name = xlib.get_soapdenovotrans_name()
                        elif result_dataset_id.startswith(xlib.get_star_code()+'-'):
                            bioinfo_app_name = xlib.get_star_name()
                        elif result_dataset_id.startswith(xlib.get_transabyss_code()+'-'):
                            bioinfo_app_name = xlib.get_transabyss_name()
                        elif result_dataset_id.startswith(xlib.get_transcript_filter_code()+'-'):
                            bioinfo_app_name = xlib.get_transcript_filter_name()
                        elif result_dataset_id.startswith(xlib.get_transcriptome_blastx_code()+'-'):
                            bioinfo_app_name = xlib.get_transcriptome_blastx_name()
                        elif result_dataset_id.startswith(xlib.get_transrate_code()+'-'):
                            bioinfo_app_name = xlib.get_transrate_name()
                        elif result_dataset_id.startswith(xlib.get_trimmomatic_code()+'-'):
                            bioinfo_app_name = xlib.get_trimmomatic_name()
                        elif result_dataset_id.startswith(xlib.get_trinity_code()+'-'):
                            bioinfo_app_name = xlib.get_trinity_name()
                        else:
                            bioinfo_app_name = 'xxx'
                        result_dataset_dict[result_dataset_id] = {'experiment_id': self.wrapper_experiment_id.get(), 'result_dataset_id': result_dataset_id, 'bioinfo_app': bioinfo_app_name, 'date': date, 'time': time}

        # verify if there are any nodes running
        if OK:
            if result_dataset_dict == {}:
                message = 'There is not any run.'
                tkinter.messagebox.showwarning('{0} - {1}'.format(xlib.get_project_name(), self.head), message)

        # build the data list
        if OK:
            data_list = ['experiment_id', 'result_dataset_id', 'bioinfo_app', 'date', 'time']

        # build the data dictionary
        if OK:
            data_dict = {}
            data_dict['experiment_id']= {'text': 'Experiment id. / Process', 'width': 200, 'aligment': 'left'}
            data_dict['result_dataset_id'] = {'text': 'Result dataset', 'width': 200, 'aligment': 'left'}
            data_dict['bioinfo_app'] = {'text': 'Bioinfo app / Utility', 'width': 200, 'aligment': 'left'}
            data_dict['date'] = {'text': 'Date', 'width': 80, 'aligment': 'right'}
            data_dict['time'] = {'text': 'Time', 'width': 80, 'aligment': 'right'}

        # create the dialog Table to show the nodes running
        if OK:
            dialog_table = gdialogs.DialogTable(self, 'Experiment runs in {0}/{1}'.format(xlib.get_cluster_result_dir(), self.wrapper_experiment_id.get()), 400, 900, data_list, data_dict, result_dataset_dict, 'view_result_logs', [self.wrapper_cluster_name.get()])
            self.wait_window(dialog_table)

        # close the form
        if OK:
            self.close()
Beispiel #10
0
def form_install_bioinfo_app(app_code):
    '''
    Install the bioinfo application software in the cluster.
    '''

    # initialize the control variable
    OK = True

    # set the bioinfo application name
    if app_code == xlib.get_blastplus_code():
        app_name = xlib.get_blastplus_name()

    elif app_code == xlib.get_diamond_code():
        app_name = xlib.get_diamond_name()

    elif app_code == xlib.get_entrez_direct_code():
        app_name = xlib.get_entrez_direct_name()

    elif app_code == xlib.get_miniconda3_code():
        app_name = xlib.get_miniconda3_name()

    elif app_code == xlib.get_r_code():
        app_name = xlib.get_r_name()

    elif app_code == xlib.get_transdecoder_code():
        app_name = xlib.get_transdecoder_name()

    # print the header
    clib.clear_screen()
    clib.print_headers_with_environment(f'{app_name} - Install software')

    # confirm the software installation
    print(xlib.get_separator())
    if app_code == xlib.get_miniconda3_code():
        OK = clib.confirm_action(
            f'{app_name} (Conda infrastructure) is going to be installed. All Conda packages previously installed will be lost and they have to be reinstalled.'
        )
    elif app_code == xlib.get_r_code():
        OK = clib.confirm_action(
            f'{app_name} and analysis packages are going to be installed. The previous version will be lost, if it exists.'
        )
    else:
        OK = clib.confirm_action(
            f'The {app_name} Conda package is going to be installed. The previous version will be lost, if it exists.'
        )

    # install the software
    if OK:

        # install the BLAST+ software
        if app_code == xlib.get_blastplus_code():
            # -- package_code_list = [(xlib.get_blastplus_conda_code(), 'last')]
            package_code_list = [(xlib.get_blastplus_conda_code(), '2.9.0')]
            devstdout = xlib.DevStdOut(
                xbioinfoapp.install_conda_package_list.__name__)
            OK = xbioinfoapp.install_conda_package_list(app_code,
                                                        app_name,
                                                        package_code_list,
                                                        devstdout,
                                                        function=None)

        # install the DIAMOND software
        elif app_code == xlib.get_diamond_code():
            # -- package_code_list = [(xlib.get_diamond_conda_code(), 'last')]
            package_code_list = [(xlib.get_diamond_conda_code(), '0.9.34')]
            devstdout = xlib.DevStdOut(
                xbioinfoapp.install_conda_package_list.__name__)
            OK = xbioinfoapp.install_conda_package_list(app_code,
                                                        app_name,
                                                        package_code_list,
                                                        devstdout,
                                                        function=None)

        # install the Entrez Direct software
        elif app_code == xlib.get_entrez_direct_code():
            package_code_list = [(xlib.get_entrez_direct_conda_code(), 'last')]
            devstdout = xlib.DevStdOut(
                xbioinfoapp.install_conda_package_list.__name__)
            OK = xbioinfoapp.install_conda_package_list(app_code,
                                                        app_name,
                                                        package_code_list,
                                                        devstdout,
                                                        function=None)

        # install the Miniconda3 software
        elif app_code == xlib.get_miniconda3_code():
            devstdout = xlib.DevStdOut(xbioinfoapp.install_miniconda3.__name__)
            OK = xbioinfoapp.install_miniconda3(devstdout, function=None)

        # install R and analysis packages
        elif app_code == xlib.get_r_code():
            devstdout = xlib.DevStdOut(xbioinfoapp.install_r.__name__)
            OK = xbioinfoapp.install_r(devstdout, function=None)

        # install the TransDecoder software
        elif app_code == xlib.get_transdecoder_code():
            package_code_list = [(xlib.get_transdecoder_conda_code(), 'last')]
            devstdout = xlib.DevStdOut(
                xbioinfoapp.install_conda_package_list.__name__)
            OK = xbioinfoapp.install_conda_package_list(app_code,
                                                        app_name,
                                                        package_code_list,
                                                        devstdout,
                                                        function=None)

    # show continuation message
    print(xlib.get_separator())
    input('Press [Intro] to continue ...')
Beispiel #11
0
def form_list_cluster_experiment_processes():
    '''
    List the processes of an experiment in the cluster.
    '''

    # initialize the control variable
    OK = True

    # print the header
    clib.clear_screen()
    clib.print_headers_with_environment(
        'Logs - List experiment processes in the cluster')

    # get the cluster name
    print(xlib.get_separator())
    if xec2.get_running_cluster_list(only_environment_cluster=True,
                                     volume_creator_included=False) != []:
        cluster_name = cinputs.input_cluster_name(
            volume_creator_included=False, help=True)
    else:
        print('WARNING: There is not any running cluster.')
        OK = False

    # create the SSH client connection
    if OK:
        (OK, error_list,
         ssh_client) = xssh.create_ssh_client_connection(cluster_name)
        for error in error_list:
            print(error)

    # get experiment identification
    if OK:
        experiment_id = cinputs.input_experiment_id(ssh_client, help=True)
        if experiment_id == '':
            print(
                f'WARNING: The cluster {cluster_name} does not have experiment data.'
            )
            OK = False

    # get the result dataset list of the experiment
    if OK:
        command = f'cd  {xlib.get_cluster_result_dir()}/{experiment_id}; for list in `ls`; do ls -ld $list | grep -v ^- > /dev/null && echo $list; done;'
        (OK, stdout, _) = xssh.execute_cluster_command(ssh_client, command)
        if OK:
            result_dataset_id_list = []
            for line in stdout:
                line = line.rstrip('\n')
                if line != 'lost+found':
                    result_dataset_id_list.append(line)

    # print the result dataset identification list of the experiment
    if OK:
        print(xlib.get_separator())
        if result_dataset_id_list == []:
            print(
                f'*** WARNING: There is not any result dataset of the experiment {experiment_id}.'
            )
        else:
            result_dataset_id_list.sort()
            # set data width
            result_dataset_width = 30
            bioinfo_app_width = 25
            # set line
            line = '{0:' + str(result_dataset_width) + '}   {1:' + str(
                bioinfo_app_width) + '}'
            # print header
            print(line.format('Result dataset', 'Bioinfo app / Utility'))
            print(
                line.format('=' * result_dataset_width,
                            '=' * bioinfo_app_width))
            # print detail lines
            for result_dataset_id in result_dataset_id_list:

                if result_dataset_id.startswith(xlib.get_bedtools_code() +
                                                '-'):
                    bioinfo_app_name = xlib.get_bedtools_name()

                elif result_dataset_id.startswith(xlib.get_blastplus_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_blastplus_name()

                elif result_dataset_id.startswith(xlib.get_bcftools_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_bcftools_name()

                elif result_dataset_id.startswith(xlib.get_bowtie2_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_bowtie2_name()

                elif result_dataset_id.startswith(xlib.get_busco_code() + '-'):
                    bioinfo_app_name = xlib.get_busco_name()

                elif result_dataset_id.startswith(xlib.get_cd_hit_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_cd_hit_name()

                elif result_dataset_id.startswith(xlib.get_cd_hit_est_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_cd_hit_est_name()

                elif result_dataset_id.startswith(xlib.get_cuffdiff_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_cuffdiff_name()

                elif result_dataset_id.startswith(xlib.get_cufflinks_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_cufflinks_name()

                elif result_dataset_id.startswith(
                        xlib.get_cufflinks_cuffmerge_code() + '-'):
                    bioinfo_app_name = xlib.get_cufflinks_cuffmerge_name()

                elif result_dataset_id.startswith(xlib.get_cuffnorm_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_cuffnorm_name()

                elif result_dataset_id.startswith(xlib.get_cuffquant_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_cuffquant_name()

                elif result_dataset_id.startswith(xlib.get_cutadapt_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_cutadapt_name()

                elif result_dataset_id.startswith(
                        xlib.get_ddradseq_simulation_code() + '-'):
                    bioinfo_app_name = xlib.get_ddradseq_simulation_name()

                elif result_dataset_id.startswith(
                        xlib.get_ddradseqtools_code() + '-'):
                    bioinfo_app_name = xlib.get_ddradseqtools_name()

                elif result_dataset_id.startswith(xlib.get_detonate_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_detonate_name()

                elif result_dataset_id.startswith(xlib.get_diamond_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_diamond_name()

                elif result_dataset_id.startswith(xlib.get_emboss_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_emboss_name()

                elif result_dataset_id.startswith(
                        xlib.get_entrez_direct_code() + '-'):
                    bioinfo_app_name = xlib.get_entrez_direct_name()

                elif result_dataset_id.startswith(xlib.get_express_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_express_name()

                elif result_dataset_id.startswith(xlib.get_fastqc_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_fastqc_name()

                elif result_dataset_id.startswith(xlib.get_ggtrinity_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_ggtrinity_name()

                elif result_dataset_id.startswith(xlib.get_gmap_gsnap_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_gmap_gsnap_name()

                elif result_dataset_id.startswith(xlib.get_gmap_code() + '-'):
                    bioinfo_app_name = xlib.get_gmap_name()

                elif result_dataset_id.startswith(xlib.get_gsnap_code() + '-'):
                    bioinfo_app_name = xlib.get_gsnap_name()

                elif result_dataset_id.startswith(xlib.get_gzip_code() + '-'):
                    bioinfo_app_name = xlib.get_gzip_name()

                elif result_dataset_id.startswith(xlib.get_hisat2_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_hisat2_name()

                elif result_dataset_id.startswith(xlib.get_htseq_code() + '-'):
                    bioinfo_app_name = xlib.get_htseq_name()

                elif result_dataset_id.startswith(xlib.get_htseq_count_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_htseq_count_name()

                elif result_dataset_id.startswith(
                        xlib.get_insilico_read_normalization_code() + '-'):
                    bioinfo_app_name = xlib.get_insilico_read_normalization_name(
                    )

                elif result_dataset_id.startswith(xlib.get_ipyrad_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_ipyrad_name()

                elif result_dataset_id.startswith(xlib.get_kallisto_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_kallisto_name()

                elif result_dataset_id.startswith(xlib.get_miniconda3_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_miniconda3_name()

                elif result_dataset_id.startswith(xlib.get_ngshelper_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_ngshelper_name()

                elif result_dataset_id.startswith(xlib.get_quast_code() + '-'):
                    bioinfo_app_name = xlib.get_quast_name()

                elif result_dataset_id.startswith(xlib.get_r_code() + '-'):
                    bioinfo_app_name = xlib.get_r_name()

                elif result_dataset_id.startswith(xlib.get_raddesigner_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_raddesigner_name()

                elif result_dataset_id.startswith(xlib.get_ref_eval_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_ref_eval_name()

                elif result_dataset_id.startswith(xlib.get_rnaquast_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_rnaquast_name()

                elif result_dataset_id.startswith(xlib.get_rsem_code() + '-'):
                    bioinfo_app_name = xlib.get_rsem_name()

                elif result_dataset_id.startswith(xlib.get_rsem_eval_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_rsem_eval_name()

                elif result_dataset_id.startswith(xlib.get_rsitesearch_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_rsitesearch_name()

                elif result_dataset_id.startswith(xlib.get_samtools_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_samtools_name()

                elif result_dataset_id.startswith(xlib.get_soapdenovo2_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_soapdenovo2_name()

                elif result_dataset_id.startswith(
                        xlib.get_soapdenovotrans_code() + '-'):
                    bioinfo_app_name = xlib.get_soapdenovotrans_name()

                elif result_dataset_id.startswith(xlib.get_star_code() + '-'):
                    bioinfo_app_name = xlib.get_star_name()

                elif result_dataset_id.startswith(xlib.get_starcode_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_starcode_name()

                elif result_dataset_id.startswith(xlib.get_toa_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_basic_data_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_basic_data_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_dicots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_dicots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_gene_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_gene_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_go_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_go_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_gymno_01_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_gymno_01_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_interpro_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_interpro_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_monocots_04_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_monocots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_taxonomy_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_taxonomy_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.
                        get_toa_process_gilist_viridiplantae_nucleotide_gi_code(
                        ) + '-'):
                    bioinfo_app_name = xlib.get_toa_process_gilist_viridiplantae_nucleotide_gi_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.
                        get_toa_process_gilist_viridiplantae_protein_gi_code()
                        + '-'):
                    bioinfo_app_name = xlib.get_toa_process_gilist_viridiplantae_protein_gi_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_basic_data_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_basic_data_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_dicots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_dicots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_gene_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_gene_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_go_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_go_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_gymno_01_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_gymno_01_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_interpro_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_interpro_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_monocots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_monocots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_merge_annotations_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_merge_annotations_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_nr_blastplus_db_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_nr_blastplus_db_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_nr_diamond_db_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_nr_diamond_db_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_nt_blastplus_db_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_nt_blastplus_db_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_pipeline_aminoacid_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_pipeline_aminoacid_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_pipeline_nucleotide_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_pipeline_nucleotide_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_dicots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_dicots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_gymno_01_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_gymno_01_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_monocots_04_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_monocots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_refseq_plant_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_refseq_plant_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_rebuild_toa_database_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_get_toa_process_rebuild_toa_database_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_recreate_toa_database_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_get_toa_process_recreate_toa_database_name(
                    )

                elif result_dataset_id.startswith(xlib.get_tophat_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_tophat_name()

                elif result_dataset_id.startswith(xlib.get_transabyss_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_transabyss_name()

                elif result_dataset_id.startswith(
                        xlib.get_transcript_filter_code() + '-'):
                    bioinfo_app_name = xlib.get_transcript_filter_name()

                elif result_dataset_id.startswith(
                        xlib.get_transcriptome_blastx_code() + '-'):
                    bioinfo_app_name = xlib.get_transcriptome_blastx_name()

                elif result_dataset_id.startswith(
                        xlib.get_transdecoder_code() + '-'):
                    bioinfo_app_name = xlib.get_transdecoder_name()

                elif result_dataset_id.startswith(xlib.get_transrate_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_transrate_name()

                elif result_dataset_id.startswith(xlib.get_trimmomatic_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_trimmomatic_name()

                elif result_dataset_id.startswith(xlib.get_trinity_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_trinity_name()

                elif result_dataset_id.startswith(
                        xlib.get_variant_calling_code() + '-'):
                    bioinfo_app_name = xlib.get_variant_calling_name()

                elif result_dataset_id.startswith(xlib.get_vcftools_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_vcftools_name()

                elif result_dataset_id.startswith(
                        xlib.get_vcftools_perl_libraries_code() + '-'):
                    bioinfo_app_name = xlib.get_vcftools_perl_libraries_name()

                elif result_dataset_id.startswith(xlib.get_vsearch_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_vsearch_name()

                else:
                    bioinfo_app_name = 'xxx'

                print(line.format(result_dataset_id, bioinfo_app_name))

    # close the SSH client connection
    if OK:
        xssh.close_ssh_client_connection(ssh_client)

    # show continuation message
    print(xlib.get_separator())
    input('Press [Intro] to continue ...')
Beispiel #12
0
def form_list_results_logs():
    '''
    List the processes of an experiment in the cluster.
    '''

    # initialize the control variable
    OK = True

    # print the header
    clib.clear_screen()
    clib.print_headers_with_environment('Logs - List result logs')

    # get experiment identification
    experiment_id = cinputs.input_experiment_id()
    if experiment_id == '':
        print('WARNING: There is not any experiment/process run.')
        OK = False

    # get the dictionary of TOA configuration.
    if OK:
        toa_config_dict = xtoa.get_toa_config_dict()

    # get the result dataset list of the experiment
    if OK:
        experiment_dir = f'{toa_config_dict["RESULT_DIR"]}/{experiment_id}'
        subdir_list = [
            subdir for subdir in os.listdir(experiment_dir)
            if os.path.isdir(os.path.join(experiment_dir, subdir))
        ]
        result_dataset_id_list = []
        for subdir in subdir_list:
            result_dataset_id_list.append(subdir)

    # print the result dataset identification list of the experiment
    if OK:
        print(xlib.get_separator())
        if result_dataset_id_list == []:
            print(
                f'*** WARNING: There is not any result dataset of the experiment/process {experiment_id}.'
            )
        else:
            result_dataset_id_list.sort()
            # set data width
            result_dataset_width = 25
            bioinfo_app_width = 25
            # set line template
            line_template = '{0:' + str(
                result_dataset_width) + '}   {1:' + str(
                    bioinfo_app_width) + '}'
            # print header
            print(
                line_template.format('Result dataset',
                                     'Bioinfo app / Utility'))
            print(
                line_template.format('=' * result_dataset_width,
                                     '=' * bioinfo_app_width))
            # print detail lines
            for result_dataset_id in result_dataset_id_list:

                if result_dataset_id.startswith(xlib.get_blastplus_code() +
                                                '-'):
                    bioinfo_app_name = xlib.get_blastplus_name()

                elif result_dataset_id.startswith(xlib.get_diamond_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_diamond_name()

                elif result_dataset_id.startswith(
                        xlib.get_entrez_direct_code() + '-'):
                    bioinfo_app_name = xlib.get_entrez_direct_name()

                elif result_dataset_id.startswith(xlib.get_miniconda3_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_miniconda3_name()

                elif result_dataset_id.startswith(xlib.get_r_code() + '-'):
                    bioinfo_app_name = xlib.get_r_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_basic_data_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_basic_data_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_dicots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_dicots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_gene_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_gene_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_go_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_go_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_gymno_01_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_gymno_01_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_interpro_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_interpro_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_monocots_04_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_monocots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_taxonomy_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_taxonomy_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.
                        get_toa_process_gilist_viridiplantae_nucleotide_gi_code(
                        ) + '-'):
                    bioinfo_app_name = xlib.get_toa_process_gilist_viridiplantae_nucleotide_gi_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.
                        get_toa_process_gilist_viridiplantae_protein_gi_code()
                        + '-'):
                    bioinfo_app_name = xlib.get_toa_process_gilist_viridiplantae_protein_gi_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_basic_data_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_basic_data_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_dicots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_dicots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_gene_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_gene_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_go_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_go_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_gymno_01_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_gymno_01_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_interpro_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_interpro_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_monocots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_monocots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_merge_annotations_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_merge_annotations_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_nr_blastplus_db_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_nr_blastplus_db_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_nr_diamond_db_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_nr_diamond_db_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_nt_blastplus_db_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_nt_blastplus_db_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_pipeline_aminoacid_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_pipeline_aminoacid_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_pipeline_nucleotide_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_pipeline_nucleotide_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_dicots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_dicots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_gymno_01_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_gymno_01_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_monocots_04_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_monocots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_refseq_plant_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_refseq_plant_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_rebuild_toa_database_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_get_toa_process_rebuild_toa_database_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_recreate_toa_database_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_get_toa_process_recreate_toa_database_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_transdecoder_code() + '-'):
                    bioinfo_app_name = xlib.get_transdecoder_name()

                else:
                    bioinfo_app_name = 'xxx'

                print(line_template.format(result_dataset_id,
                                           bioinfo_app_name))

    # show continuation message
    print(xlib.get_separator())
    input('Press [Intro] to continue ...')