Esempio n. 1
0
    def populate_combobox_experiment_id(self):
        '''
        Populate data in "combobox_experiment_id".
        '''

        # clear the value selected in the combobox
        self.wrapper_experiment_id.set('')

        # initialize the experiment identification list
        experiment_id_list = []

        # get the experiment identifications
        command = 'ls {0}'.format(xlib.get_cluster_result_dir())
        (OK, stdout, stderr) = xssh.execute_cluster_command(self.ssh_client, command)
        if OK:
            for line in stdout:
                line = line.rstrip('\n')
                if line != 'lost+found':
                    experiment_id_list.append(line)

        # verify if there are any experimment identifications
        if experiment_id_list == []:
            message = 'The cluster has not experiment data.'
            tkinter.messagebox.showwarning('{0} - {1}'.format(xlib.get_project_name(), self.head), message)
            return

        # load the names of clusters which are running in the combobox
        self.combobox_experiment_id['values'] = experiment_id_list
Esempio n. 2
0
    def combobox_cluster_name_selected_item(self, event=None):
        '''
        Process the event when an item of "combobox_cluster_name" has been selected
        '''

        # set cursor to show busy status
        self.main.config(cursor='watch')
        self.main.update()

        # verify if the cluster name selected is different to the previous cluster name
        if self.wrapper_cluster_name.get() != self.cluster_name_ant:

            # close SSH client connection
            if self.cluster_name_ant is not None:
                xssh.close_ssh_client_connection(self.ssh_client)

            # create the SSH client connection
            (OK, error_list, self.ssh_client) = xssh.create_ssh_client_connection(self.wrapper_cluster_name.get(), 'master')
            if not OK:
                message = ''
                for error in error_list:
                    message = '{0}{1}\n'.format(message, error) 
                tkinter.messagebox.showerror('{0} - {1}'.format(xlib.get_project_name(), self.head), message)
                self.close()

            # save current cluster name as previous cluster name
            self.cluster_name_ant = self.wrapper_cluster_name.get()

        # load data in "combobox_experiment_id"
        self.populate_combobox_experiment_id()

        # set cursor to show normal status
        self.main.config(cursor='')
        self.main.update()
Esempio n. 3
0
def print_headers_without_environment(process_name):
    '''
    Print the headers of a screen withtout the environment information.
    '''

    # print the project name, version and the process name
    title = '{0} v {1} - {2}'.format(xlib.get_project_name(),
                                     xlib.get_project_version(), process_name)
    line = '-' * len(title)
    print('+-{0}-+'.format(line))
    print('| {0} |'.format(title))
    print('+-{0}-+'.format(line))
    print()
Esempio n. 4
0
    def populate_combobox_cluster_name(self):
        '''
        Populate data in "combobox_cluster_name".
        '''

        # clear the value selected in the combobox
        self.wrapper_cluster_name.set('')

        # verify if there are some running clusters
        running_cluster_list = xec2.get_running_cluster_list(volume_creator_included=False)
        if running_cluster_list == []:
            message = 'There is not any running cluster.'
            tkinter.messagebox.showwarning('{0} - {1}'.format(xlib.get_project_name(), self.head), message)
            return

        # load the names of clusters which are running in the combobox
        self.combobox_cluster_name['values'] = running_cluster_list
Esempio n. 5
0
def build_parser():
    '''
    Build the parser with the available arguments.
    '''

    # import the module xlib
    import xlib

    # create the parser and add arguments
    description = 'Description: This program start NGScloud2 both console mode and gui mode.'
    text = '{0} v{1} - {2}\n\n{3}\n'.format(xlib.get_project_name(), xlib.get_project_version(), os.path.basename(__file__), description)
    usage = '\r{0}\nUsage: {1} arguments'.format(text.ljust(len('usage:')), os.path.basename(__file__))
    parser = argparse.ArgumentParser(usage=usage)
    parser._optionals.title = 'Arguments'
    parser.add_argument('--mode', dest='mode', help='Mode: console or gui')

    # return the paser
    return parser
Esempio n. 6
0
def print_headers_with_environment(process_name):
    '''
    Print the headers of a screen with environmen information.
    '''

    # print the project name, version and the process name
    title = '{0} v {1} - {2}'.format(xlib.get_project_name(),
                                     xlib.get_project_version(), process_name)
    line = '-' * len(title)
    print('+-{0}-+'.format(line))
    print('| {0} |'.format(title))
    print('+-{0}-+'.format(line))
    print()

    # get current region and zone names
    region_name = xconfiguration.get_current_region_name()
    zone_name = xconfiguration.get_current_zone_name()

    # print the environment and the current region and zone names
    print('Environment: {0} - Region: {1} - Zone: {2}'.format(
        xconfiguration.environment, region_name, zone_name))
    print()
Esempio n. 7
0
def form_create_ngscloud_config_file(is_menu_call):
    '''
    Create the NGScloud config file corresponding to the environment.
    '''

    # initialize the control variable
    OK = True

    # print the header
    if is_menu_call:
        clib.clear_screen()
        clib.print_headers_with_environment('Configuration - Recreate TransciptomeCloud config file')

    # get current region and zone names
    region_name = xconfiguration.get_current_region_name()
    zone_name = xconfiguration.get_current_zone_name()

    # get basic AWS data and contact e-mail address from NGScloud config file
    (user_id, access_key_id, secret_access_key) = xconfiguration.get_basic_aws_data()
    email = xconfiguration.get_contact_data()

    # confirm or change the AWS data and contact e-mail address
    print(xlib.get_separator())
    user_id = cinputs.input_user_id(user_id)
    access_key_id = cinputs.input_access_key_id(access_key_id)
    secret_access_key = cinputs.input_secret_access_key(secret_access_key)
    email = cinputs.input_email(email)

    # verify the AWS access key identification and the AWS secret access key   
    print(xlib.get_separator())
    print('Verifying the AWS access key identification and the AWS secret access key')
    OK = xec2.verify_aws_credentials(access_key_id, secret_access_key)
    if OK:
        print('The credentials are OK.')
    else:
        print('ERROR: The credentials are wrong. Please review your access key identification and secret access key in the AWS web.')
        if not is_menu_call:
            raise xlib.ProgramException('EXIT')

    # confirm the creation of the NGScloud config file
    if OK:
        if is_menu_call:
            print(xlib.get_separator())
            OK = clib.confirm_action('The {0} config file is going to be created. The previous files will be lost.'.format(xlib.get_project_name()))

    # create the NGScloud config file corresponding to the environment
    if OK:
        print(xlib.get_separator())
        print('The file {0} is being created ...'.format(xconfiguration.get_ngscloud_config_file()))
        (OK, error_list) = xconfiguration.create_ngscloud_config_file(user_id, access_key_id, secret_access_key, email)
        if OK:
            print('The config file is created with default values.')
            print()
            print('You can modify the conection data and contact e-mail address in:')
            print('    "Cloud control" -> "Configuration" -> "Update connection data and contact e-mail"')
            print('The assigned region and zone are {0} and {1}, respectively. You can modify them in:'.format(xconfiguration.get_default_region_name(), xconfiguration.get_default_zone_name()))
            print('    "Cloud control" -> "Configuration" -> "Update region and zone data"')
        else:
            for error in error_list:
                print(error)
            raise xlib.ProgramException('C001')

    # show continuation message 
    print(xlib.get_separator())
    input('Press [Intro] to continue ...')
Esempio n. 8
0
def build_gmap_process_script(cluster_name, current_run_dir):
    '''
    Build the current GMAP process script.
    '''

    # initialize the control variable and the error list
    OK = True
    error_list = []

    # get the GMAP option dictionary
    gmap_option_dict = xlib.get_option_dict(get_gmap_config_file())

    # get the options
    experiment_id = gmap_option_dict['identification']['experiment_id']
    reference_dataset_id = gmap_option_dict['identification']['reference_dataset_id']
    reference_file = gmap_option_dict['identification']['reference_file']
    assembly_software = gmap_option_dict['identification']['assembly_software']
    assembly_dataset_id = gmap_option_dict['identification']['assembly_dataset_id']
    assembly_type = gmap_option_dict['identification']['assembly_type']
    threads = gmap_option_dict['GMAP parameters']['threads']
    kmer = gmap_option_dict['GMAP parameters']['kmer']
    sampling = gmap_option_dict['GMAP parameters']['sampling']
    input_buffer_size = gmap_option_dict['GMAP parameters']['input-buffer-size']
    output_buffer_size = gmap_option_dict['GMAP parameters']['output-buffer-size']
    prunelevel = gmap_option_dict['GMAP parameters']['prunelevel']
    format = gmap_option_dict['GMAP parameters']['format']
    other_parameters = gmap_option_dict['GMAP parameters']['other_parameters']

    # set the cluster reference dataset directory
    cluster_reference_dataset_dir = xlib.get_cluster_reference_dataset_dir(reference_dataset_id)

    # set the cluster reference file
    cluster_reference_file = xlib.get_cluster_reference_file(reference_dataset_id, reference_file)

    # set the GMAP database name
    reference_file_name, reference_file_extension = os.path.splitext(reference_file)
    gmap_database = '{0}-gmap_database'.format(reference_file_name)

    # set the transcriptome file path
    if assembly_software == xlib.get_soapdenovotrans_code():
        if assembly_type.upper() == 'CONTIGS':
            transcriptome_file = '{0}/{1}-{2}.contig'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id), experiment_id, assembly_dataset_id)
        elif assembly_type.upper() == 'SCAFFOLDS':
            transcriptome_file = '{0}/{1}-{2}.scafSeq'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id), experiment_id, assembly_dataset_id)
    elif assembly_software == xlib.get_transabyss_code():
        transcriptome_file = '{0}/transabyss-final.fa'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id))
    elif assembly_software == xlib.get_trinity_code():
        transcriptome_file = '{0}/Trinity.fasta'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id))
    elif assembly_software == xlib.get_star_code():
        transcriptome_file = '{0}/Trinity-GG.fasta'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id))
    elif assembly_software == xlib.get_cd_hit_est_code():
        transcriptome_file = '{0}/clustered-transcriptome.fasta'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id))
    elif assembly_software == xlib.get_transcript_filter_code():
        transcriptome_file = '{0}/filtered-transcriptome.fasta'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id))

    # set the output file path
    output_file = 'gmap_output_{0}.txt'.format(format.lower())

    # get the GMAP process script name
    gmap_process_script = get_gmap_process_script()

    # write the GMAP process script
    try:
        if not os.path.exists(os.path.dirname(gmap_process_script)):
            os.makedirs(os.path.dirname(gmap_process_script))
        with open(gmap_process_script, mode='w', encoding='utf8', newline='\n') as file_id:
            file_id.write('{0}\n'.format('#!/bin/bash'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('GMAP_GSNAP_PATH={0}/{1}/envs/{2}/bin'.format(xlib.get_cluster_app_dir(), xlib.get_miniconda3_name(), xlib.get_gmap_gsnap_bioconda_code())))
            file_id.write('{0}\n'.format('PATH=$GMAP_GSNAP_PATH:$PATH'))
            file_id.write('{0}\n'.format('SEP="#########################################"'))
            file_id.write('{0}\n'.format('cd {0}/{1}/bin'.format(xlib.get_cluster_app_dir(), xlib.get_miniconda3_name())))
            file_id.write('{0}\n'.format('source activate {0}'.format(xlib.get_gmap_gsnap_bioconda_code())))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function init'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    INIT_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format('    FORMATTED_INIT_DATETIME=`date --date="@$INIT_DATETIME" "+%Y-%m-%d %H:%M:%S"`'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    echo "Script started in node $HOSTNAME of cluster {0} at $FORMATTED_INIT_DATETIME UTC."'.format(cluster_name)))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function build_gmap_database'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    cd {0}'.format(current_run_dir)))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    /usr/bin/time \\'))
            file_id.write('{0}\n'.format('        --format="$SEP\\nElapsed real time (s): %e\\nCPU time in kernel mode (s): %S\\nCPU time in user mode (s): %U\\nPercentage of CPU: %P\\nMaximum resident set size(Kb): %M\\nAverage total memory use (Kb):%K" \\'))
            file_id.write('{0}\n'.format('        gmap_build \\'))
            file_id.write('{0}\n'.format('            --dir={0}\\'.format(cluster_reference_dataset_dir)))
            file_id.write('{0}\n'.format('            --db={0}\\'.format(gmap_database)))
            if kmer.upper() != 'NONE':
                file_id.write('{0}\n'.format('            --kmer={0} \\'.format(kmer)))
            file_id.write('{0}\n'.format('            {0}'.format(cluster_reference_file)))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function run_gmap_process'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    cd {0}'.format(current_run_dir)))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    gmap --version'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    /usr/bin/time \\'))
            file_id.write('{0}\n'.format('        --format="$SEP\\nElapsed real time (s): %e\\nCPU time in kernel mode (s): %S\\nCPU time in user mode (s): %U\\nPercentage of CPU: %P\\nMaximum resident set size(Kb): %M\\nAverage total memory use (Kb):%K" \\'))
            file_id.write('{0}\n'.format('        gmap \\'))
            file_id.write('{0}\n'.format('            --nthreads={0} \\'.format(threads)))
            file_id.write('{0}\n'.format('            --dir={0} \\'.format(cluster_reference_dataset_dir)))
            file_id.write('{0}\n'.format('            --db={0} \\'.format(gmap_database)))
            if kmer.upper() != 'NONE':
                file_id.write('{0}\n'.format('            --kmer={0} \\'.format(kmer)))
            if sampling.upper() != 'NONE':
                file_id.write('{0}\n'.format('            --sampling={0} \\'.format(sampling)))
            file_id.write('{0}\n'.format('            --input-buffer-size={0} \\'.format(input_buffer_size)))
            file_id.write('{0}\n'.format('            --output-buffer-size={0} \\'.format(output_buffer_size)))
            file_id.write('{0}\n'.format('            --prunelevel={0} \\'.format(prunelevel)))
            if format.upper() == 'COMPRESS':
                file_id.write('{0}\n'.format('            --compress \\'))
            elif format.upper() == 'SUMMARY':
                file_id.write('{0}\n'.format('            --summary \\'))
            elif format.upper() == 'ALIGN':
                file_id.write('{0}\n'.format('            --align \\'))
            else:
                file_id.write('{0}\n'.format('            --format={0} \\'.format(format.lower())))
            file_id.write('{0}\n'.format('            --ordered \\'))
            file_id.write('{0}\n'.format('            --nofails \\'))
            if other_parameters.upper() != 'NONE':
                parameter_list = [x.strip() for x in other_parameters.split(';')]
                for i in range(len(parameter_list)):
                    if parameter_list[i].find('=') > 0:
                        pattern = r'^--(.+)=(.+)$'
                        mo = re.search(pattern, parameter_list[i])
                        parameter_name = mo.group(1).strip()
                        parameter_value = mo.group(2).strip()
                        file_id.write('{0}\n'.format('            --{0}={1} \\'.format(parameter_name, parameter_value)))
                    else:
                        pattern = r'^--(.+)$'
                        mo = re.search(pattern, parameter_list[i])
                        parameter_name = mo.group(1).strip()
                        file_id.write('{0}\n'.format('            --{0} \\'.format(parameter_name)))
            file_id.write('{0}\n'.format('            {0} \\'.format(transcriptome_file)))
            file_id.write('{0}\n'.format('            > {0}'.format(output_file)))
            file_id.write('{0}\n'.format('    RC=$?'))
            file_id.write('{0}\n'.format('    if [ $RC -ne 0 ]; then manage_error gmap $RC; fi'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function end'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    END_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format('    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`'))
            file_id.write('{0}\n'.format('    calculate_duration'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    echo "Script ended OK at $FORMATTED_END_DATETIME UTC with a run duration of $DURATION s ($FORMATTED_DURATION)."'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    RECIPIENT={0}'.format(xconfiguration.get_contact_data())))
            file_id.write('{0}\n'.format('    SUBJECT="{0}: {1} process"'.format(xlib.get_project_name(), xlib.get_gmap_name())))
            file_id.write('{0}\n'.format('    MESSAGE="The {0} process in node $HOSTNAME of cluster {1} ended OK at $FORMATTED_END_DATETIME with a run duration of $DURATION s ($FORMATTED_DURATION). Please review its log.<br/><br/>Regards,<br/>GI Genetica, Fisiologia e Historia Forestal<br/>Dpto. Sistemas y Recursos Naturales<br/>ETSI Montes, Forestal y del Medio Natural<br/>Universidad Politecnica de Madrid<br/>https://github.com/ggfhf/"'.format(xlib.get_gmap_name(), cluster_name)))
            file_id.write('{0}\n'.format('    mail --append "Content-type: text/html;"  --subject "$SUBJECT" "$RECIPIENT" <<< "$MESSAGE"'))
            file_id.write('{0}\n'.format('    exit 0'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function manage_error'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    END_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format('    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`'))
            file_id.write('{0}\n'.format('    calculate_duration'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    echo "ERROR: $1 returned error $2"'))
            file_id.write('{0}\n'.format('    echo "Script ended WRONG at $FORMATTED_END_DATETIME UTC with a run duration of $DURATION s ($FORMATTED_DURATION)."'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    RECIPIENT={0}'.format(xconfiguration.get_contact_data())))
            file_id.write('{0}\n'.format('    SUBJECT="{0}: {1} process"'.format(xlib.get_project_name(), xlib.get_gmap_name())))
            file_id.write('{0}\n'.format('    MESSAGE="The {0} process in node $HOSTNAME of cluster {1} ended WRONG at $FORMATTED_END_DATETIME with a run duration of $DURATION s ($FORMATTED_DURATION). Please review its log.<br/><br/>Regards,<br/>GI Genetica, Fisiologia e Historia Forestal<br/>Dpto. Sistemas y Recursos Naturales<br/>ETSI Montes, Forestal y del Medio Natural<br/>Universidad Politecnica de Madrid<br/>https://github.com/ggfhf/"'.format(xlib.get_gmap_name(), cluster_name)))
            file_id.write('{0}\n'.format('    mail --append "Content-type: text/html;"  --subject "$SUBJECT" "$RECIPIENT" <<< "$MESSAGE"'))
            file_id.write('{0}\n'.format('    exit 3'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function calculate_duration'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    DURATION=`expr $END_DATETIME - $INIT_DATETIME`'))
            file_id.write('{0}\n'.format('    HH=`expr $DURATION / 3600`'))
            file_id.write('{0}\n'.format('    MM=`expr $DURATION % 3600 / 60`'))
            file_id.write('{0}\n'.format('    SS=`expr $DURATION % 60`'))
            file_id.write('{0}\n'.format('    FORMATTED_DURATION=`printf "%03d:%02d:%02d\\n" $HH $MM $SS`'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('init'))
            file_id.write('{0}\n'.format('build_gmap_database'))
            file_id.write('{0}\n'.format('run_gmap_process'))
            file_id.write('{0}\n'.format('end'))
    except:
        error_list.append('*** ERROR: The file {0} can not be created'.format(gmap_process_script))
        OK = False

    # return the control variable and the error list
    return (OK, error_list)
Esempio n. 9
0
def create_cluster(template_name,
                   cluster_name,
                   log,
                   function=None,
                   is_menu_call=True):
    '''
    Create a cluster from a template name.
    '''

    # initialize the control variable
    OK = True

    # initialize the state variables
    master_state_code = ''
    master_state_name = ''

    # get current region and zone names
    region_name = xconfiguration.get_current_region_name()
    zone_name = xconfiguration.get_current_zone_name()

    # warn that the log window does not have to be closed
    if not isinstance(log, xlib.DevStdOut) and is_menu_call:
        log.write(
            'This process might take several minutes. Do not close this window, please wait!\n'
        )

    # warn that the requirements are being verified
    log.write(f'{xlib.get_separator()}\n')
    log.write('Checking process requirements ...\n')

    # check that the cluster is defined in the NGScloud config file
    if OK:
        if not xconfiguration.is_template_defined(template_name):
            log.write(
                '*** ERROR: The cluster {0} is not defined in the {1} config file.\n'
                .format(cluster_name, xlib.get_project_name()))
            OK = False

    # check that the cluster mode is None
    if OK:
        if xec2.get_cluster_mode(cluster_name) is not None:
            log.write('*** ERROR: There is a cluster or a instance running.\n')
            OK = False

    # check that the zone is available
    if OK:
        if not xec2.is_zone_available(region_name, zone_name):
            log.write(
                '*** ERROR: The zone name {0} is not available.\n'.format(
                    zone_name))
            OK = False

    # warn that the requirements are OK
    if OK:
        log.write('Process requirements are OK.\n')

    # create the cluster
    if OK:
        log.write(f'{xlib.get_separator()}\n')
        if cluster_name == xlib.get_volume_creator_name():
            log.write('Creating the volume creator using StarCluster ...\n')
        else:
            log.write(
                'Creating the cluster {0} using StarCluster ...\n'.format(
                    cluster_name))
        log.write('\n')
        if template_name == xlib.get_volume_creator_name():
            command = '{0} --region={1} start --availability-zone={2} --cluster-template={3} --disable-queue {4}'.format(
                xlib.get_starcluster(), region_name, zone_name, template_name,
                cluster_name)
        else:
            command = '{0} --region={1} start --availability-zone={2} --cluster-template={3} {4}'.format(
                xlib.get_starcluster(), region_name, zone_name, template_name,
                cluster_name)
        rc = xlib.run_command(command, log)
        log.write('\n')
        if rc == 0:
            (master_state_code,
             master_state_name) = xec2.get_node_state(cluster_name,
                                                      node_name='master')
            if cluster_name == xlib.get_volume_creator_name():
                log.write('The volume creator is created.\n')
            else:
                log.write('The cluster is created.\n')
        else:
            log.write('*** ERROR: Return code {0} in command -> {1}\n'.format(
                rc, command))
            log.write('***')
            log.write(
                '*** You have to terminate {0} (option "Force termination of a cluster")\n'
                .format(cluster_name))
            log.write('*** and create it again.\n')
            OK = False

    # install infrastructure  software in every node of the cluster
    if OK:
        if cluster_name != xlib.get_volume_creator_name():
            cluster_node_list = xec2.get_cluster_node_list(cluster_name)
            for node_name in cluster_node_list:
                OK = xnode.install_node_infrastructure_software(
                    cluster_name, node_name, log)

    # warn that the log window can be closed
    if not isinstance(log, xlib.DevStdOut) and is_menu_call:
        log.write(f'{xlib.get_separator()}\n')
        log.write('You can close this window now.\n')

    # execute final function
    if function is not None:
        function()

    # return the control variable and the state
    return (OK, master_state_code, master_state_name)
Esempio n. 10
0
def build_fastqc_process_script(cluster_name, current_run_dir):
    '''
    Build the current FastQC process script.
    '''

    # initialize the control variable and the error list
    OK = True
    error_list = []

    # get the FastQC option dictionary
    fastqc_option_dict = xlib.get_option_dict(get_fastqc_config_file())

    # get the options
    experiment_id = fastqc_option_dict['identification']['experiment_id']
    read_dataset_id = fastqc_option_dict['identification']['read_dataset_id']
    threads = fastqc_option_dict['FastQC parameters']['threads']

    # get the sections list
    sections_list = []
    for section in fastqc_option_dict.keys():
        sections_list.append(section)
    sections_list.sort()

    # build the file name list
    file_name_list = []
    for section in sections_list:
        # if the section identification is like library-n
        if re.match('^file-[0-9]+$', section):
            file_name = fastqc_option_dict[section]['file_name']
            file_name_list.append(file_name)

    # write the FastQC process script
    try:
        if not os.path.exists(os.path.dirname(get_fastqc_process_script())):
            os.makedirs(os.path.dirname(get_fastqc_process_script()))
        with open(get_fastqc_process_script(),
                  mode='w',
                  encoding='utf8',
                  newline='\n') as file_id:
            file_id.write('{0}\n'.format('#!/bin/bash'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format(
                'FASTQC_PATH={0}/{1}/envs/{2}/bin'.format(
                    xlib.get_cluster_app_dir(), xlib.get_miniconda3_name(),
                    xlib.get_fastqc_bioconda_code())))
            file_id.write('{0}\n'.format('PATH=$FASTQC_PATH:$PATH'))
            file_id.write('{0}\n'.format(
                'SEP="#########################################"'))
            file_id.write('{0}\n'.format('cd {0}/{1}/bin'.format(
                xlib.get_cluster_app_dir(), xlib.get_miniconda3_name())))
            file_id.write('{0}\n'.format('source activate {0}'.format(
                xlib.get_fastqc_bioconda_code())))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function init'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    INIT_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format(
                '    FORMATTED_INIT_DATETIME=`date --date="@$INIT_DATETIME" "+%Y-%m-%d %H:%M:%S"`'
            ))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format(
                '    echo "Script started in node $HOSTNAME of cluster {0} at $FORMATTED_INIT_DATETIME UTC."'
                .format(cluster_name)))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function run_fastqc_process'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    cd {0}'.format(current_run_dir)))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    fastqc --version'))
            for file_name in file_name_list:
                file_id.write('{0}\n'.format('    echo "$SEP"'))
                file_id.write('{0}\n'.format('    /usr/bin/time \\'))
                file_id.write('{0}\n'.format(
                    '        --format="$SEP\\nElapsed real time (s): %e\\nCPU time in kernel mode (s): %S\\nCPU time in user mode (s): %U\\nPercentage of CPU: %P\\nMaximum resident set size(Kb): %M\\nAverage total memory use (Kb):%K" \\'
                ))
                file_id.write('{0}\n'.format('        fastqc \\'))
                file_id.write('{0}\n'.format('            {0} \\'.format(
                    xlib.get_cluster_read_file(experiment_id, read_dataset_id,
                                               file_name))))
                file_id.write('{0}\n'.format(
                    '            --threads={0} \\'.format(threads)))
                file_id.write('{0}\n'.format(
                    '            --outdir={0}'.format(current_run_dir)))
                file_id.write('{0}\n'.format('    RC=$?'))
                file_id.write('{0}\n'.format(
                    '    if [ $RC -ne 0 ]; then manage_error fastqc $RC; fi'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function end'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    END_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format(
                '    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`'
            ))
            file_id.write('{0}\n'.format('    calculate_duration'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format(
                '    echo "Script ended OK at $FORMATTED_END_DATETIME UTC with a run duration of $DURATION s ($FORMATTED_DURATION)."'
            ))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    RECIPIENT={0}'.format(
                xconfiguration.get_contact_data())))
            file_id.write('{0}\n'.format(
                '    SUBJECT="{0}: {1} process"'.format(
                    xlib.get_project_name(), xlib.get_fastqc_name())))
            file_id.write('{0}\n'.format(
                '    MESSAGE="The {0} process in node $HOSTNAME of cluster {1} ended OK at $FORMATTED_END_DATETIME with a run duration of $DURATION s ($FORMATTED_DURATION). Please review its log.<br/><br/>Regards,<br/>GI Genetica, Fisiologia e Historia Forestal<br/>Dpto. Sistemas y Recursos Naturales<br/>ETSI Montes, Forestal y del Medio Natural<br/>Universidad Politecnica de Madrid<br/>https://github.com/ggfhf/"'
                .format(xlib.get_fastqc_name(), cluster_name)))
            file_id.write('{0}\n'.format(
                '    mail --append "Content-type: text/html;"  --subject "$SUBJECT" "$RECIPIENT" <<< "$MESSAGE"'
            ))
            file_id.write('{0}\n'.format('    exit 0'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function manage_error'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    END_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format(
                '    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`'
            ))
            file_id.write('{0}\n'.format('    calculate_duration'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write(
                '{0}\n'.format('    echo "ERROR: $1 returned error $2"'))
            file_id.write('{0}\n'.format(
                '    echo "Script ended WRONG at $FORMATTED_END_DATETIME UTC with a run duration of $DURATION s ($FORMATTED_DURATION)."'
            ))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    RECIPIENT={0}'.format(
                xconfiguration.get_contact_data())))
            file_id.write('{0}\n'.format(
                '    SUBJECT="{0}: {1} process"'.format(
                    xlib.get_project_name(), xlib.get_fastqc_name())))
            file_id.write('{0}\n'.format(
                '    MESSAGE="The {0} process in node $HOSTNAME of cluster {1} ended WRONG at $FORMATTED_END_DATETIME with a run duration of $DURATION s ($FORMATTED_DURATION). Please review its log.<br/><br/>Regards,<br/>GI Genetica, Fisiologia e Historia Forestal<br/>Dpto. Sistemas y Recursos Naturales<br/>ETSI Montes, Forestal y del Medio Natural<br/>Universidad Politecnica de Madrid<br/>https://github.com/ggfhf/"'
                .format(xlib.get_fastqc_name(), cluster_name)))
            file_id.write('{0}\n'.format(
                '    mail --append "Content-type: text/html;"  --subject "$SUBJECT" "$RECIPIENT" <<< "$MESSAGE"'
            ))
            file_id.write('{0}\n'.format('    exit 3'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function calculate_duration'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format(
                '    DURATION=`expr $END_DATETIME - $INIT_DATETIME`'))
            file_id.write('{0}\n'.format('    HH=`expr $DURATION / 3600`'))
            file_id.write(
                '{0}\n'.format('    MM=`expr $DURATION % 3600 / 60`'))
            file_id.write('{0}\n'.format('    SS=`expr $DURATION % 60`'))
            file_id.write('{0}\n'.format(
                '    FORMATTED_DURATION=`printf "%03d:%02d:%02d\\n" $HH $MM $SS`'
            ))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('init'))
            file_id.write('{0}\n'.format('run_fastqc_process'))
            file_id.write('{0}\n'.format('end'))
    except:
        error_list.append('*** ERROR: The file {0} can not be created'.format(
            get_fastqc_process_script()))
        OK = False

    # return the control variable and the error list
    return (OK, error_list)
Esempio n. 11
0
def build_cd_hit_est_process_script(cluster_name, current_run_dir):
    '''
    Build the current CD-HIT-EST process script.
    '''

    # initialize the control variable and the error list
    OK = True
    error_list = []

    # get the option dictionary
    cd_hit_est_option_dict = xlib.get_option_dict(get_cd_hit_est_config_file())

    # get the options
    experiment_id = cd_hit_est_option_dict['identification']['experiment_id']
    assembly_software = cd_hit_est_option_dict['identification'][
        'assembly_software']
    assembly_dataset_id = cd_hit_est_option_dict['identification'][
        'assembly_dataset_id']
    assembly_type = cd_hit_est_option_dict['identification']['assembly_type']
    threads = cd_hit_est_option_dict['CD-HIT-EST parameters']['threads']
    memory_limit = cd_hit_est_option_dict['CD-HIT-EST parameters'][
        'memory_limit']
    seq_identity_threshold = cd_hit_est_option_dict['CD-HIT-EST parameters'][
        'seq_identity_threshold']
    word_length = cd_hit_est_option_dict['CD-HIT-EST parameters'][
        'word_length']
    mask = cd_hit_est_option_dict['CD-HIT-EST parameters']['mask']
    match = cd_hit_est_option_dict['CD-HIT-EST parameters']['match']
    mismatch = cd_hit_est_option_dict['CD-HIT-EST parameters']['mismatch']
    other_parameters = cd_hit_est_option_dict['CD-HIT-EST parameters'][
        'other_parameters']

    # set the transcriptome file path
    if assembly_software == xlib.get_soapdenovotrans_code():
        if assembly_type == 'CONTIGS':
            transcriptome_file = '{0}/{1}-{2}.contig'.format(
                xlib.get_cluster_experiment_result_dataset_dir(
                    experiment_id, assembly_dataset_id), experiment_id,
                assembly_dataset_id)
        elif assembly_type == 'SCAFFOLDS':
            transcriptome_file = '{0}/{1}-{2}.scafSeq'.format(
                xlib.get_cluster_experiment_result_dataset_dir(
                    experiment_id, assembly_dataset_id), experiment_id,
                assembly_dataset_id)
    elif assembly_software == xlib.get_transabyss_code():
        transcriptome_file = '{0}/transabyss-final.fa'.format(
            xlib.get_cluster_experiment_result_dataset_dir(
                experiment_id, assembly_dataset_id))
    elif assembly_software == xlib.get_trinity_code():
        transcriptome_file = '{0}/Trinity.fasta'.format(
            xlib.get_cluster_experiment_result_dataset_dir(
                experiment_id, assembly_dataset_id))
    elif assembly_software == xlib.get_star_code():
        transcriptome_file = '{0}/Trinity-GG.fasta'.format(
            xlib.get_cluster_experiment_result_dataset_dir(
                experiment_id, assembly_dataset_id))
    elif assembly_software == xlib.get_cd_hit_est_code():
        transcriptome_file = '{0}/clustered-transcriptome.fasta'.format(
            xlib.get_cluster_experiment_result_dataset_dir(
                experiment_id, assembly_dataset_id))
    elif assembly_software == xlib.get_transcript_filter_code():
        transcriptome_file = '{0}/filtered-transcriptome.fasta'.format(
            xlib.get_cluster_experiment_result_dataset_dir(
                experiment_id, assembly_dataset_id))

    # set the output file path
    if OK:
        output_file = '{0}/clustered-transcriptome.fasta'.format(
            current_run_dir)

    # write the CD-HIT-EST process script
    try:
        if not os.path.exists(os.path.dirname(
                get_cd_hit_est_process_script())):
            os.makedirs(os.path.dirname(get_cd_hit_est_process_script()))
        with open(get_cd_hit_est_process_script(),
                  mode='w',
                  encoding='utf8',
                  newline='\n') as file_id:
            file_id.write('{0}\n'.format('#!/bin/bash'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format(
                'CDHIT_PATH={0}/{1}/envs/{2}/bin'.format(
                    xlib.get_cluster_app_dir(), xlib.get_miniconda3_name(),
                    xlib.get_cd_hit_bioconda_code())))
            file_id.write('{0}\n'.format('PATH=$CDHIT_PATH:$PATH'))
            file_id.write('{0}\n'.format(
                'SEP="#########################################"'))
            file_id.write('{0}\n'.format('cd {0}/{1}/bin'.format(
                xlib.get_cluster_app_dir(), xlib.get_miniconda3_name())))
            file_id.write('{0}\n'.format('source activate {0}'.format(
                xlib.get_cd_hit_bioconda_code())))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function init'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    INIT_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format(
                '    FORMATTED_INIT_DATETIME=`date --date="@$INIT_DATETIME" "+%Y-%m-%d %H:%M:%S"`'
            ))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format(
                '    echo "Script started in node $HOSTNAME of cluster {0} at $FORMATTED_INIT_DATETIME UTC."'
                .format(cluster_name)))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function run_cd_hit_est_process'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    cd {0}'.format(current_run_dir)))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format(
                '    echo "Running {0} process ..."'.format(
                    xlib.get_cd_hit_est_name())))
            file_id.write('{0}\n'.format('    /usr/bin/time \\'))
            file_id.write('{0}\n'.format(
                '        --format="$SEP\\nElapsed real time (s): %e\\nCPU time in kernel mode (s): %S\\nCPU time in user mode (s): %U\\nPercentage of CPU: %P\\nMaximum resident set size(Kb): %M\\nAverage total memory use (Kb):%K" \\'
            ))
            file_id.write('{0}\n'.format('        cd-hit-est \\'))
            file_id.write('{0}\n'.format(
                '            -T {0} \\'.format(threads)))
            file_id.write('{0}\n'.format(
                '            -M {0} \\'.format(memory_limit)))
            file_id.write('{0}\n'.format(
                '            -i {0} \\'.format(transcriptome_file)))
            file_id.write('{0}\n'.format(
                '            -c {0} \\'.format(seq_identity_threshold)))
            file_id.write('{0}\n'.format(
                '            -n {0} \\'.format(word_length)))
            file_id.write('{0}\n'.format(
                '            -mask {0} \\'.format(mask)))
            file_id.write('{0}\n'.format(
                '            -match {0} \\'.format(match)))
            file_id.write('{0}\n'.format(
                '            -mismatch {0} \\'.format(mismatch)))
            if other_parameters.upper() == 'NONE':
                file_id.write('{0}\n'.format(
                    '            -o {0}'.format(output_file)))
            else:
                file_id.write('{0}\n'.format(
                    '            -o {0} \\'.format(output_file)))
                parameter_list = [
                    x.strip() for x in other_parameters.split(';')
                ]
                for i in range(len(parameter_list)):
                    if parameter_list[i].find('=') > 0:
                        pattern = r'^--(.+)=(.+)$'
                        mo = re.search(pattern, parameter_list[i])
                        parameter_name = mo.group(1).strip()
                        parameter_value = mo.group(2).strip()
                        if i < len(parameter_list) - 1:
                            file_id.write('{0}\n'.format(
                                '            -{0} {1} \\'.format(
                                    parameter_name, parameter_value)))
                        else:
                            file_id.write('{0}\n'.format(
                                '            -{0} {1}'.format(
                                    parameter_name, parameter_value)))
                    else:
                        pattern = r'^--(.+)$'
                        mo = re.search(pattern, parameter_list[i])
                        parameter_name = mo.group(1).strip()
                        if i < len(parameter_list):
                            file_id.write('{0}\n'.format(
                                '            -{0} \\'.format(parameter_name)))
                        else:
                            file_id.write('{0}\n'.format(
                                '            -{0}'.format(parameter_name)))
                    i += 1
            file_id.write('{0}\n'.format('    RC=$?'))
            file_id.write('{0}\n'.format(
                '    if [ $RC -ne 0 ]; then manage_error cd-hit-est $RC; fi'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function end'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    END_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format(
                '    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`'
            ))
            file_id.write('{0}\n'.format('    calculate_duration'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format(
                '    echo "Script ended OK at $FORMATTED_END_DATETIME UTC with a run duration of $DURATION s ($FORMATTED_DURATION)."'
            ))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    RECIPIENT={0}'.format(
                xconfiguration.get_contact_data())))
            file_id.write('{0}\n'.format(
                '    SUBJECT="{0}: {1} process"'.format(
                    xlib.get_project_name(), xlib.get_cd_hit_est_name())))
            file_id.write('{0}\n'.format(
                '    MESSAGE="The {0} process in node $HOSTNAME of cluster {0} ended OK at $FORMATTED_END_DATETIME with a run duration of $DURATION s ($FORMATTED_DURATION). Please review its log.<br/><br/>Regards,<br/>GI Genetica, Fisiologia e Historia Forestal<br/>Dpto. Sistemas y Recursos Naturales<br/>ETSI Montes, Forestal y del Medio Natural<br/>Universidad Politecnica de Madrid<br/>https://github.com/ggfhf/"'
                .format(xlib.get_rsem_eval_name(), cluster_name)))
            file_id.write('{0}\n'.format(
                '    mail --append "Content-type: text/html;"  --subject "$SUBJECT" "$RECIPIENT" <<< "$MESSAGE"'
            ))
            file_id.write('{0}\n'.format('    exit 0'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function manage_error'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    END_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format(
                '    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`'
            ))
            file_id.write('{0}\n'.format('    calculate_duration'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write(
                '{0}\n'.format('    echo "ERROR: $1 returned error $2"'))
            file_id.write('{0}\n'.format(
                '    echo "Script ended WRONG at $FORMATTED_END_DATETIME UTC with a run duration of $DURATION s ($FORMATTED_DURATION)."'
            ))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    RECIPIENT={0}'.format(
                xconfiguration.get_contact_data())))
            file_id.write('{0}\n'.format(
                '    SUBJECT="{0}: {1} process"'.format(
                    xlib.get_project_name(), xlib.get_cd_hit_est_name())))
            file_id.write('{0}\n'.format(
                '    MESSAGE="The {0} process in node $HOSTNAME of cluster {0} ended WRONG at $FORMATTED_END_DATETIME with a run duration of $DURATION s ($FORMATTED_DURATION). Please review its log.<br/><br/>Regards,<br/>GI Genetica, Fisiologia e Historia Forestal<br/>Dpto. Sistemas y Recursos Naturales<br/>ETSI Montes, Forestal y del Medio Natural<br/>Universidad Politecnica de Madrid<br/>https://github.com/ggfhf/"'
                .format(xlib.get_rsem_eval_name(), cluster_name)))
            file_id.write('{0}\n'.format(
                '    mail --append "Content-type: text/html;"  --subject "$SUBJECT" "$RECIPIENT" <<< "$MESSAGE"'
            ))
            file_id.write('{0}\n'.format('    exit 3'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function calculate_duration'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format(
                '    DURATION=`expr $END_DATETIME - $INIT_DATETIME`'))
            file_id.write('{0}\n'.format('    HH=`expr $DURATION / 3600`'))
            file_id.write(
                '{0}\n'.format('    MM=`expr $DURATION % 3600 / 60`'))
            file_id.write('{0}\n'.format('    SS=`expr $DURATION % 60`'))
            file_id.write('{0}\n'.format(
                '    FORMATTED_DURATION=`printf "%03d:%02d:%02d\\n" $HH $MM $SS`'
            ))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('init'))
            file_id.write('{0}\n'.format('run_cd_hit_est_process'))
            file_id.write('{0}\n'.format('end'))
    except:
        error_list.append('*** ERROR: The file {0} can not be created'.format(
            get_cd_hit_est_process_script()))
        OK = False

    # return the control variable and the error list
    return (OK, error_list)
Esempio n. 12
0
def validate_cd_hit_est_config_file(strict):
    '''
    Validate the CD-HIT-EST config file of a run.
    '''

    # initialize the control variable and the error list
    OK = True
    error_list = []

    # intitialize variable used when value is not found
    not_found = '***NOTFOUND***'.upper()

    # get the option dictionary
    try:
        cd_hit_est_option_dict = xlib.get_option_dict(
            get_cd_hit_est_config_file())
    except:
        error_list.append('*** ERROR: The syntax is WRONG.')
        OK = False
    else:

        # get the sections list
        sections_list = []
        for section in cd_hit_est_option_dict.keys():
            sections_list.append(section)
        sections_list.sort()

        # check section "identification"
        if 'identification' not in sections_list:
            error_list.append(
                '*** ERROR: the section "identification" is not found.')
            OK = False
        else:

            # check section "identification" - key "experiment_id"
            experiment_id = cd_hit_est_option_dict.get(
                'identification', {}).get('experiment_id', not_found)
            if experiment_id == not_found:
                error_list.append(
                    '*** ERROR: the key "experiment_id" is not found in the section "identification".'
                )
                OK = False

            # check section "identification" - key "assembly_software"
            assembly_software = cd_hit_est_option_dict.get(
                'identification', {}).get('assembly_software', not_found)
            if assembly_software == not_found:
                error_list.append(
                    '*** ERROR: the key "assembly_software" is not found in the section "identification".'
                )
                OK = False
            elif assembly_software not in [
                    xlib.get_soapdenovotrans_code(),
                    xlib.get_transabyss_code(),
                    xlib.get_trinity_code(),
                    xlib.get_star_code(),
                    xlib.get_cd_hit_est_code(),
                    xlib.get_transcript_filter_code()
            ]:
                error_list.append(
                    '*** ERROR: the key "assembly_software" value in the section "identification" must be {0} or {1} or {2} or {3} or {4} OR {5}.'
                    .format(xlib.get_soapdenovotrans_code(),
                            xlib.get_transabyss_code(),
                            xlib.get_trinity_code(), xlib.get_star_code(),
                            xlib.get_cd_hit_est_code(),
                            xlib.get_transcript_filter_code()))
                OK = False

            # check section "identification" - key "assembly_dataset_id"
            assembly_dataset_id = cd_hit_est_option_dict.get(
                'identification', {}).get('assembly_dataset_id', not_found)
            if assembly_dataset_id == not_found:
                error_list.append(
                    '*** ERROR: the key "assembly_dataset_id" is not found in the section "identification".'
                )
                OK = False
            elif not assembly_dataset_id.startswith(
                    xlib.get_soapdenovotrans_code()
            ) and not assembly_dataset_id.startswith(xlib.get_transabyss_code(
            )) and not assembly_dataset_id.startswith(xlib.get_trinity_code(
            )) and not assembly_dataset_id.startswith(xlib.get_star_code(
            )) and not assembly_dataset_id.startswith(xlib.get_cd_hit_est_code(
            )) and not assembly_dataset_id.startswith(
                    xlib.get_transcript_filter_code()):
                error_list.append(
                    '*** ERROR: the key "assembly_dataset_id" value is not a {0} nor {1} nor {2} nor {3} nor {4} nor {5} assembly.'
                    .format(xlib.get_soapdenovotrans_name(),
                            xlib.get_transabyss_name(),
                            xlib.get_trinity_name(), xlib.get_star_name(),
                            xlib.get_cd_hit_est_name(),
                            xlib.get_transcript_filter_code()))
                OK = False

            # check section "identification" - key "assembly_type"
            assembly_type = cd_hit_est_option_dict.get(
                'identification', {}).get('assembly_type', not_found)
            if assembly_type == not_found:
                error_list.append(
                    '*** ERROR: the key "assembly_type" is not found in the section "identification".'
                )
                OK = False
            elif assembly_dataset_id.startswith(
                    xlib.get_soapdenovotrans_code()):
                if assembly_type.upper() not in ['CONTIGS', 'SCAFFOLDS']:
                    error_list.append(
                        '*** ERROR: the key "assembly_type" must be "CONTIGS" or "SCAFFOLDS" when {0} is the assembly software.'
                        .format(xlib.get_soapdenovotrans_name()))
                    OK = False
            elif assembly_dataset_id.startswith(xlib.get_transabyss_code(
            )) or assembly_dataset_id.startswith(xlib.get_trinity_code(
            )) or assembly_dataset_id.startswith(
                    xlib.get_star_code()) or assembly_dataset_id.startswith(
                        xlib.get_cd_hit_est_code(
                        )) or assembly_dataset_id.startswith(
                            xlib.get_transcript_filter_code()):
                if assembly_type.upper() != 'NONE':
                    error_list.append(
                        '*** ERROR: the key "assembly_type" must be "NONE" when {0} or {1} or {2} or {3} or {4} is the assembly software.'
                        .format(xlib.get_transabyss_name(),
                                xlib.get_trinity_name(), xlib.get_star_name(),
                                xlib.get_cd_hit_est_name(),
                                xlib.get_transcript_filter_name()))
                    OK = False

        # check section "CD-HIT-EST parameters"
        if 'CD-HIT-EST parameters' not in sections_list:
            error_list.append(
                '*** ERROR: the section "CD-HIT-EST parameters" is not found.')
            OK = False
        else:

            # check section "CD-HIT-EST parameters" - key "threads"
            threads = cd_hit_est_option_dict.get('CD-HIT-EST parameters',
                                                 {}).get('threads', not_found)
            if threads == not_found:
                error_list.append(
                    '*** ERROR: the key "threads" is not found in the section "CD-HIT-EST parameters".'
                )
                OK = False
            else:
                try:
                    if int(threads) < 0:
                        error_list.append(
                            '*** ERROR: the key "threads" in the section "CD-HIT-EST parameters" must be an integer value greater or equal to 0.'
                        )
                        OK = False
                except:
                    error_list.append(
                        '*** ERROR: the key "threads" in the section "CD-HIT-EST parameters" must be an integer value greater or equal to 0.'
                    )
                    OK = False

            # check section "CD-HIT-EST parameters" - key "memory_limit"
            memory_limit = cd_hit_est_option_dict.get(
                'CD-HIT-EST parameters', {}).get('memory_limit', not_found)
            if memory_limit == not_found:
                error_list.append(
                    '*** ERROR: the key "memory_limit" is not found in the section "CD-HIT-EST parameters".'
                )
                OK = False
            else:
                try:
                    if int(memory_limit) < 0:
                        error_list.append(
                            '*** ERROR: the key "memory_limit" in the section "CD-HIT-EST parameters" must be an integer value greater or equal to 0.'
                        )
                        OK = False
                except:
                    error_list.append(
                        '*** ERROR: the key "memory_limit" in the section "CD-HIT-EST parameters" must be an integer value greater or equal to 0.'
                    )
                    OK = False

            # check section "CD-HIT-EST parameters" - key "seq_identity_threshold"
            seq_identity_threshold = cd_hit_est_option_dict.get(
                'CD-HIT-EST parameters', {}).get('seq_identity_threshold',
                                                 not_found)
            if seq_identity_threshold == not_found:
                error_list.append(
                    '*** ERROR: the key "seq_identity_threshold" is not found in the section "CD-HIT-EST parameters".'
                )
                OK = False
            else:
                try:
                    if float(seq_identity_threshold) < 0.0 or float(
                            seq_identity_threshold) > 1.0:
                        error_list.append(
                            '*** ERROR: the key "seq_identity_threshold" in the section "CD-HIT-EST parameters" must be a float value between 0.0 and 1.0.'
                        )
                        OK = False
                except:
                    error_list.append(
                        '*** ERROR: the key "seq_identity_threshold" in the section "CD-HIT-EST parameters" must be a float value between 0.0 and 1.0.'
                    )
                    OK = False

            # check section "CD-HIT-EST parameters" - key "word_length"
            word_length = cd_hit_est_option_dict.get(
                'CD-HIT-EST parameters', {}).get('word_length', not_found)
            if word_length == not_found:
                error_list.append(
                    '*** ERROR: the key "word_length" is not found in the section "CD-HIT-EST parameters".'
                )
                OK = False
            else:
                try:
                    if int(word_length) < 1:
                        error_list.append(
                            '*** ERROR: the key "word_length" in the section "CD-HIT-EST parameters" must be an integer value greater or equal to 1.'
                        )
                        OK = False
                except:
                    error_list.append(
                        '*** ERROR: the key "word_length" in the section "CD-HIT-EST parameters" must be an integer value greater or equal to 1.'
                    )
                    OK = False

            # check section "CD-HIT-EST parameters" - key "mask"
            mask = cd_hit_est_option_dict.get('CD-HIT-EST parameters',
                                              {}).get('mask',
                                                      not_found).upper()
            if mask == not_found:
                error_list.append(
                    '*** ERROR: the key "mask" is not found in the section "CD-HIT-EST parameters".'
                )
                OK = False

            # check section "CD-HIT-EST parameters" - key "match"
            match = cd_hit_est_option_dict.get('CD-HIT-EST parameters',
                                               {}).get('match', not_found)
            if match == not_found:
                error_list.append(
                    '*** ERROR: the key "match" is not found in the section "CD-HIT-EST parameters".'
                )
                OK = False
            else:
                try:
                    int(match)
                except:
                    error_list.append(
                        '*** ERROR: the key "match" in the section "CD-HIT-EST parameters" must be an integer value.'
                    )
                    OK = False

            # check section "CD-HIT-EST parameters" - key "mismatch"
            mismatch = cd_hit_est_option_dict.get('CD-HIT-EST parameters',
                                                  {}).get(
                                                      'mismatch', not_found)
            if mismatch == not_found:
                error_list.append(
                    '*** ERROR: the key "mismatch" is not found in the section "CD-HIT-EST parameters".'
                )
                OK = False
            else:
                try:
                    int(mismatch)
                except:
                    error_list.append(
                        '*** ERROR: the key "match" in the section "CD-HIT-EST parameters" must be an integer value.'
                    )
                    OK = False

            # check section "CD-HIT-EST parameters" - key "other_parameters"
            not_allowed_parameters_list = [
                'T', 'M', 'c', 'n', 'mask', 'match', 'mismatch'
            ]
            other_parameters = cd_hit_est_option_dict.get(
                'CD-HIT-EST parameters', {}).get('other_parameters', not_found)
            if other_parameters == not_found:
                error_list.append(
                    '*** ERROR: the key "other_parameters" is not found in the section "CD-HIT-EST parameters".'
                )
                OK = False
            else:
                if other_parameters.upper() != 'NONE':
                    parameter_list = [
                        x.strip() for x in other_parameters.split(';')
                    ]
                    for parameter in parameter_list:
                        try:
                            if parameter.find('=') > 0:
                                pattern = r'^--(.+)=(.+)$'
                                mo = re.search(pattern, parameter)
                                parameter_name = mo.group(1).strip()
                                parameter_value = mo.group(2).strip()
                            else:
                                pattern = r'^--(.+)$'
                                mo = re.search(pattern, parameter)
                                parameter_name = mo.group(1).strip()
                        except:
                            error_list.append(
                                '*** ERROR: the value of the key "other_parameters" in the section "CD-HIT-EST parameters" must be NONE or a valid parameter list.'
                            )
                            OK = False
                            break
                        if parameter_name in not_allowed_parameters_list:
                            error_list.append(
                                '*** ERROR: the parameter {0} is not allowed in the key "other_parameters" of the section "CD-HIT-EST parameters" because it is controled by {1}.'
                                .format(parameter_name,
                                        xlib.get_project_name()))
                            OK = False

    # warn that the results config file is not valid if there are any errors
    if not OK:
        error_list.append(
            '\nThe {0} config file is not valid. Please, correct this file or recreate it.'
            .format(xlib.get_cd_hit_est_name()))

    # return the control variable and the error list
    return (OK, error_list)
Esempio n. 13
0
    def execute(self, event=None):
        '''
        Execute the list the submission logs in the local host.
        '''

        # validate inputs
        OK = self.validate_inputs()
        if not OK:
            message = 'Some input values are not OK.'
            tkinter.messagebox.showerror('{0} - {1}'.format(xlib.get_project_name(), self.head), message)

        # get the local process dictionary
        local_process_dict = xlib.get_local_process_dict()

        # build the log dictionary
        if OK:
            log_dict = {}
            if self.wrapper_local_process_text.get() == ' all':
                command = xlib.list_log_files_command('all')
            else:
                command = xlib.list_log_files_command(self.local_process_id)
            output = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
            for line in output.stdout.split('\n'):
                if line != '':
                    line = os.path.basename(line)
                    run_id = line
                    try:
                        pattern = r'^(.+)\-(.+)\-(.+)\-(.+).txt$'
                        mo = re.search(pattern, line)
                        environment = mo.group(1)
                        local_process_id = mo.group(2).strip()
                        yymmdd = mo.group(3)
                        hhmmss = mo.group(4)
                        process_text = local_process_dict[local_process_id]['text']
                        date = '20{0}-{1}-{2}'.format(yymmdd[:2], yymmdd[2:4], yymmdd[4:])
                        time = '{0}:{1}:{2}'.format(hhmmss[:2], hhmmss[2:4], hhmmss[4:])
                    except:
                        process_text = 'unknown process'
                        date = '0000-00-00'
                        time = '00:00:00'
                    log_dict[run_id] = {'run_id': run_id, 'process_text': process_text, 'date': date, 'time': time}

        # verify if there are any nodes running
        if OK:
            if log_dict == {}:
                message = 'There is not any local process log.'
                tkinter.messagebox.showwarning('{0} - {1}'.format(xlib.get_project_name(), self.head), message)
                OK = False

        # build the data list
        if OK:
            data_list = ['run_id', 'process_text', 'date', 'time']

        # build the data dictionary
        if OK:
            data_dict = {}
            data_dict['run_id'] = {'text': 'Run id', 'width': 300, 'aligment': 'left'}
            data_dict['process_text'] = {'text': 'Process', 'width': 300, 'aligment': 'left'}
            data_dict['date'] = {'text': 'Date', 'width': 80, 'aligment': 'right'}
            data_dict['time'] = {'text': 'Time', 'width': 80, 'aligment': 'right'}

        # create the dialog Table to list the local process logs
        if OK:
            dialog_table = gdialogs.DialogTable(self, 'Local process log', 400, 900, data_list, data_dict, log_dict, 'view_submission_logs')
            self.wait_window(dialog_table)

        # close the form
        if OK:
            self.close()
Esempio n. 14
0
def build_busco_process_script(cluster_name, current_run_dir):
    '''
    Build the current BUSCO process script.
    '''

    # initialize the control variable and the error list
    OK = True
    error_list = []

    # get the BUSCO option dictionary
    busco_option_dict = xlib.get_option_dict(get_busco_config_file())

    # get the options
    experiment_id = busco_option_dict['identification']['experiment_id']
    assembly_software = busco_option_dict['identification']['assembly_software']
    assembly_dataset_id = busco_option_dict['identification']['assembly_dataset_id']
    assembly_type = busco_option_dict['identification']['assembly_type']
    ncpu = busco_option_dict['BUSCO parameters']['ncpu']
    lineage_data = busco_option_dict['BUSCO parameters']['lineage_data']
    lineage_data_file = '{0}.tar.gz'.format(lineage_data)
    lineage_data_url = 'http://busco.ezlab.org/v2/datasets/{0}'.format(lineage_data_file)
    mode = busco_option_dict['BUSCO parameters']['mode'].lower()
    evalue = busco_option_dict['BUSCO parameters']['evalue']
    limit = busco_option_dict['BUSCO parameters']['limit']
    species = busco_option_dict['BUSCO parameters']['species']
    long = busco_option_dict['BUSCO parameters']['long'].upper()
    augustus_options = busco_option_dict['BUSCO parameters']['augustus_options'].upper()

    # set the transcriptome file path
    if assembly_software == xlib.get_soapdenovotrans_code():
        if assembly_type == 'CONTIGS':
            transcriptome_file = '{0}/{1}-{2}.contig'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id), experiment_id, assembly_dataset_id)
        elif  assembly_type == 'SCAFFOLDS':
            transcriptome_file = '{0}/{1}-{2}.scafSeq'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id), experiment_id, assembly_dataset_id)
    elif assembly_software == xlib.get_transabyss_code():
        transcriptome_file = '{0}/transabyss-final.fa'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id))
    elif assembly_software == xlib.get_trinity_code():
        transcriptome_file = '{0}/Trinity.fasta'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id))
    elif assembly_software == xlib.get_star_code():
        transcriptome_file = '{0}/Trinity-GG.fasta'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id))
    elif assembly_software == xlib.get_cd_hit_est_code():
        transcriptome_file = '{0}/clustered-transcriptome.fasta'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id))
    elif assembly_software == xlib.get_transcript_filter_code():
        transcriptome_file = '{0}/filtered-transcriptome.fasta'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id))

    # write the BUSCO process script
    try:
        if not os.path.exists(os.path.dirname(get_busco_process_script())):
            os.makedirs(os.path.dirname(get_busco_process_script()))
        with open(get_busco_process_script(), mode='w', encoding='utf8', newline='\n') as file_id:
            file_id.write('{0}\n'.format('#!/bin/bash'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('BUSCO_PATH={0}/{1}/envs/{2}/bin'.format(xlib.get_cluster_app_dir(), xlib.get_miniconda3_name(), xlib.get_busco_bioconda_code())))
            file_id.write('{0}\n'.format('export PATH=$BUSCO_PATH:$PATH'))
            file_id.write('{0}\n'.format('SEP="#########################################"'))
            file_id.write('{0}\n'.format('cd {0}/{1}/bin'.format(xlib.get_cluster_app_dir(), xlib.get_miniconda3_name())))
            file_id.write('{0}\n'.format('source activate {0}'.format(xlib.get_busco_bioconda_code())))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function init'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    INIT_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format('    FORMATTED_INIT_DATETIME=`date --date="@$INIT_DATETIME" "+%Y-%m-%d %H:%M:%S"`'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    echo "Script started in node $HOSTNAME of cluster {0} at $FORMATTED_INIT_DATETIME UTC."'.format(cluster_name)))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function download_lineage_data'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    cd {0}'.format(current_run_dir)))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    echo "Downloading lineage data ..."'))
            file_id.write('{0}\n'.format('    wget --quiet --output-document ./{0} {1}'.format(lineage_data_file, lineage_data_url)))
            file_id.write('{0}\n'.format('    tar -xzvf ./{0}'.format(lineage_data_file)))
            file_id.write('{0}\n'.format('    rm ./{0}'.format(lineage_data_file)))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function run_busco_process'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    cd {0}'.format(current_run_dir)))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    run_BUSCO.py --version'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    /usr/bin/time \\'))
            file_id.write('{0}\n'.format('        --format="$SEP\\nElapsed real time (s): %e\\nCPU time in kernel mode (s): %S\\nCPU time in user mode (s): %U\\nPercentage of CPU: %P\\nMaximum resident set size(Kb): %M\\nAverage total memory use (Kb):%K" \\'))
            file_id.write('{0}\n'.format('        run_BUSCO.py \\'))
            file_id.write('{0}\n'.format('            --cpu={0} \\'.format(ncpu)))
            file_id.write('{0}\n'.format('            --lineage_path=./{0} \\'.format(lineage_data)))
            file_id.write('{0}\n'.format('            --mode={0} \\'.format(mode)))
            file_id.write('{0}\n'.format('            --evalue={0} \\'.format(evalue)))
            file_id.write('{0}\n'.format('            --limit={0} \\'.format(limit)))
            if species.upper() != 'NONE':
                file_id.write('{0}\n'.format('            --species={0} \\'.format(species)))
            if long == 'YES':
                file_id.write('{0}\n'.format('            --long \\'))
            if augustus_options.upper() != 'NONE':
                file_id.write('{0}\n'.format("            --august_options='{0}' \\".format(augustus_options)))
            file_id.write('{0}\n'.format('            --in={0} \\'.format(transcriptome_file)))
            file_id.write('{0}\n'.format('            --out={0}'.format(os.path.basename(current_run_dir))))
            file_id.write('{0}\n'.format('    RC=$?'))
            file_id.write('{0}\n'.format('    if [ $RC -ne 0 ]; then manage_error run_BUSCO.py $RC; fi'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function end'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    END_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format('    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`'))
            file_id.write('{0}\n'.format('    calculate_duration'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    echo "Script ended OK at $FORMATTED_END_DATETIME UTC with a run duration of $DURATION s ($FORMATTED_DURATION)."'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    RECIPIENT={0}'.format(xconfiguration.get_contact_data())))
            file_id.write('{0}\n'.format('    SUBJECT="{0}: {1} process"'.format(xlib.get_project_name(), xlib.get_busco_name())))
            file_id.write('{0}\n'.format('    MESSAGE="The {0} process in node $HOSTNAME of cluster {1} ended OK at $FORMATTED_END_DATETIME with a run duration of $DURATION s ($FORMATTED_DURATION). Please review its log.<br/><br/>Regards,<br/>GI Genetica, Fisiologia e Historia Forestal<br/>Dpto. Sistemas y Recursos Naturales<br/>ETSI Montes, Forestal y del Medio Natural<br/>Universidad Politecnica de Madrid<br/>https://github.com/ggfhf/"'.format(xlib.get_busco_name(), cluster_name)))
            file_id.write('{0}\n'.format('    mail --append "Content-type: text/html;"  --subject "$SUBJECT" "$RECIPIENT" <<< "$MESSAGE"'))
            file_id.write('{0}\n'.format('    exit 0'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function manage_error'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    END_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format('    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`'))
            file_id.write('{0}\n'.format('    calculate_duration'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    echo "ERROR: $1 returned error $2"'))
            file_id.write('{0}\n'.format('    echo "Script ended WRONG at $FORMATTED_END_DATETIME UTC with a run duration of $DURATION s ($FORMATTED_DURATION)."'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    RECIPIENT={0}'.format(xconfiguration.get_contact_data())))
            file_id.write('{0}\n'.format('    SUBJECT="{0}: {1} process"'.format(xlib.get_project_name(), xlib.get_busco_name())))
            file_id.write('{0}\n'.format('    MESSAGE="The {0} process in node $HOSTNAME of cluster {1} ended WRONG at $FORMATTED_END_DATETIME with a run duration of $DURATION s ($FORMATTED_DURATION). Please review its log.<br/><br/>Regards,<br/>GI Genetica, Fisiologia e Historia Forestal<br/>Dpto. Sistemas y Recursos Naturales<br/>ETSI Montes, Forestal y del Medio Natural<br/>Universidad Politecnica de Madrid<br/>https://github.com/ggfhf/"'.format(xlib.get_busco_name(), cluster_name)))
            file_id.write('{0}\n'.format('    mail --append "Content-type: text/html;"  --subject "$SUBJECT" "$RECIPIENT" <<< "$MESSAGE"'))
            file_id.write('{0}\n'.format('    exit 3'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function calculate_duration'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    DURATION=`expr $END_DATETIME - $INIT_DATETIME`'))
            file_id.write('{0}\n'.format('    HH=`expr $DURATION / 3600`'))
            file_id.write('{0}\n'.format('    MM=`expr $DURATION % 3600 / 60`'))
            file_id.write('{0}\n'.format('    SS=`expr $DURATION % 60`'))
            file_id.write('{0}\n'.format('    FORMATTED_DURATION=`printf "%03d:%02d:%02d\\n" $HH $MM $SS`'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('init'))
            file_id.write('{0}\n'.format('download_lineage_data'))
            file_id.write('{0}\n'.format('run_busco_process'))
            file_id.write('{0}\n'.format('end'))
    except:
        error_list.append('*** ERROR: The file {0} can not be created'.format(get_busco_process_script()))
        OK = False

    # return the control variable and the error list
    return (OK, error_list)
Esempio n. 15
0
    def execute(self, event=None):
        '''
        Execute the list the result logs in the cluster.
        '''

        # validate inputs
        OK = self.validate_inputs()
        if not OK:
            message = 'Some input values are not OK.'
            tkinter.messagebox.showerror('{0} - {1}'.format(xlib.get_project_name(), self.head), message)

        # get the run dictionary of the experiment
        if OK:
            # -- command = 'ls {0}/{1}'.format(xlib.get_cluster_result_dir(), self.wrapper_experiment_id.get())
            command = 'cd  {0}/{1}; for list in `ls`; do ls -ld $list | grep -v ^- > /dev/null && echo $list; done;'.format(xlib.get_cluster_result_dir(), self.wrapper_experiment_id.get())
            (OK, stdout, stderr) = xssh.execute_cluster_command(self.ssh_client, command)
            if OK:
                result_dataset_dict = {}
                for line in stdout:
                    line = line.rstrip('\n')
                    if line != 'lost+found':
                        result_dataset_id = line
                        try:
                            pattern = r'^(.+)\-(.+)\-(.+)$'
                            mo = re.search(pattern, result_dataset_id)
                            bioinfo_app_code = mo.group(1).strip()
                            yymmdd = mo.group(2)
                            hhmmss = mo.group(3)
                            date = '20{0}-{1}-{2}'.format(yymmdd[:2], yymmdd[2:4], yymmdd[4:])
                            time = '{0}:{1}:{2}'.format(hhmmss[:2], hhmmss[2:4], hhmmss[4:])
                        except:
                            bioinfo_app_code = 'xxx'
                            date = '0000-00-00'
                            time = '00:00:00'
                        if result_dataset_id.startswith(xlib.get_bedtools_code()+'-'):
                            bioinfo_app_name = xlib.get_bedtools_name()
                        elif result_dataset_id.startswith(xlib.get_blastplus_code()+'-'):
                            bioinfo_app_name = xlib.get_blastplus_name()
                        elif result_dataset_id.startswith(xlib.get_bowtie2_code()+'-'):
                            bioinfo_app_name = xlib.get_bowtie2_name()
                        elif result_dataset_id.startswith(xlib.get_busco_code()+'-'):
                            bioinfo_app_name = xlib.get_busco_name()
                        elif result_dataset_id.startswith(xlib.get_cd_hit_code()+'-'):
                            bioinfo_app_name = xlib.get_cd_hit_name()
                        elif result_dataset_id.startswith(xlib.get_cd_hit_est_code()+'-'):
                            bioinfo_app_name = xlib.get_cd_hit_est_name()
                        elif result_dataset_id.startswith(xlib.get_detonate_code()+'-'):
                            bioinfo_app_name = xlib.get_detonate_name()
                        elif result_dataset_id.startswith(xlib.get_emboss_code()+'-'):
                            bioinfo_app_name = xlib.get_emboss_name()
                        elif result_dataset_id.startswith(xlib.get_fastqc_code()+'-'):
                            bioinfo_app_name = xlib.get_fastqc_name()
                        elif result_dataset_id.startswith(xlib.get_gmap_code()+'-'):
                            bioinfo_app_name = xlib.get_gmap_name()
                        elif result_dataset_id.startswith(xlib.get_gmap_gsnap_code()+'-'):
                            bioinfo_app_name = xlib.get_gmap_gsnap_name()
                        elif result_dataset_id.startswith(xlib.get_gzip_code()+'-'):
                            bioinfo_app_name = xlib.get_gzip_name()
                        elif result_dataset_id.startswith(xlib.get_insilico_read_normalization_code()+'-'):
                            bioinfo_app_name = xlib.get_insilico_read_normalization_name()
                        elif result_dataset_id.startswith(xlib.get_miniconda3_code()+'-'):
                            bioinfo_app_name = xlib.get_miniconda3_name()
                        elif result_dataset_id.startswith(xlib.get_ngshelper_code()+'-'):
                            bioinfo_app_name = xlib.get_ngshelper_name()
                        elif result_dataset_id.startswith(xlib.get_quast_code()+'-'):
                            bioinfo_app_name = xlib.get_quast_name()
                        elif result_dataset_id.startswith(xlib.get_r_code()+'-'):
                            bioinfo_app_name = xlib.get_r_name()
                        elif result_dataset_id.startswith(xlib.get_ref_eval_code()+'-'):
                            bioinfo_app_name = xlib.get_ref_eval_name()
                        elif result_dataset_id.startswith(xlib.get_rnaquast_code()+'-'):
                            bioinfo_app_name = xlib.get_rnaquast_name()
                        elif result_dataset_id.startswith(xlib.get_rsem_code()+'-'):
                            bioinfo_app_name = xlib.get_rsem_name()
                        elif result_dataset_id.startswith(xlib.get_rsem_eval_code()+'-'):
                            bioinfo_app_name = xlib.get_rsem_eval_name()
                        elif result_dataset_id.startswith(xlib.get_samtools_code()+'-'):
                            bioinfo_app_name = xlib.get_samtools_name()
                        elif result_dataset_id.startswith(xlib.get_soapdenovotrans_code()+'-'):
                            bioinfo_app_name = xlib.get_soapdenovotrans_name()
                        elif result_dataset_id.startswith(xlib.get_star_code()+'-'):
                            bioinfo_app_name = xlib.get_star_name()
                        elif result_dataset_id.startswith(xlib.get_transabyss_code()+'-'):
                            bioinfo_app_name = xlib.get_transabyss_name()
                        elif result_dataset_id.startswith(xlib.get_transcript_filter_code()+'-'):
                            bioinfo_app_name = xlib.get_transcript_filter_name()
                        elif result_dataset_id.startswith(xlib.get_transcriptome_blastx_code()+'-'):
                            bioinfo_app_name = xlib.get_transcriptome_blastx_name()
                        elif result_dataset_id.startswith(xlib.get_transrate_code()+'-'):
                            bioinfo_app_name = xlib.get_transrate_name()
                        elif result_dataset_id.startswith(xlib.get_trimmomatic_code()+'-'):
                            bioinfo_app_name = xlib.get_trimmomatic_name()
                        elif result_dataset_id.startswith(xlib.get_trinity_code()+'-'):
                            bioinfo_app_name = xlib.get_trinity_name()
                        else:
                            bioinfo_app_name = 'xxx'
                        result_dataset_dict[result_dataset_id] = {'experiment_id': self.wrapper_experiment_id.get(), 'result_dataset_id': result_dataset_id, 'bioinfo_app': bioinfo_app_name, 'date': date, 'time': time}

        # verify if there are any nodes running
        if OK:
            if result_dataset_dict == {}:
                message = 'There is not any run.'
                tkinter.messagebox.showwarning('{0} - {1}'.format(xlib.get_project_name(), self.head), message)

        # build the data list
        if OK:
            data_list = ['experiment_id', 'result_dataset_id', 'bioinfo_app', 'date', 'time']

        # build the data dictionary
        if OK:
            data_dict = {}
            data_dict['experiment_id']= {'text': 'Experiment id. / Process', 'width': 200, 'aligment': 'left'}
            data_dict['result_dataset_id'] = {'text': 'Result dataset', 'width': 200, 'aligment': 'left'}
            data_dict['bioinfo_app'] = {'text': 'Bioinfo app / Utility', 'width': 200, 'aligment': 'left'}
            data_dict['date'] = {'text': 'Date', 'width': 80, 'aligment': 'right'}
            data_dict['time'] = {'text': 'Time', 'width': 80, 'aligment': 'right'}

        # create the dialog Table to show the nodes running
        if OK:
            dialog_table = gdialogs.DialogTable(self, 'Experiment runs in {0}/{1}'.format(xlib.get_cluster_result_dir(), self.wrapper_experiment_id.get()), 400, 900, data_list, data_dict, result_dataset_dict, 'view_result_logs', [self.wrapper_cluster_name.get()])
            self.wait_window(dialog_table)

        # close the form
        if OK:
            self.close()
Esempio n. 16
0
def build_infrastructure_software_installation_script(cluster_name):
    '''
    Build the infrastructure software installation script.
    '''

    # initialize the control variable and the error list
    OK = True
    error_list = []

    # get infrastructure software installation script path
    infrastructure_software_installation_script = get_infrastructure_software_installation_script(
    )

    # write the infrastructure software installation script
    try:
        if not os.path.exists(
                os.path.dirname(infrastructure_software_installation_script)):
            os.makedirs(
                os.path.dirname(infrastructure_software_installation_script))
        with open(infrastructure_software_installation_script,
                  mode='w',
                  encoding='utf8',
                  newline='\n') as file_id:
            file_id.write('{0}\n'.format('#!/bin/bash'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write(
                '{0}\n'.format('export DEBIAN_FRONTEND=noninteractive'))
            file_id.write('{0}\n'.format(
                'SEP="#########################################"'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function init'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    INIT_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format(
                '    FORMATTED_INIT_DATETIME=`date --date="@$INIT_DATETIME" "+%Y-%m-%d %H:%M:%S"`'
            ))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format(
                '    echo "Script started in node $HOSTNAME of cluster {0} at $FORMATTED_INIT_DATETIME UTC."'
                .format(cluster_name)))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function fix_source_list'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format(
                '    echo "Fixing file /etc/apt/sources.list ..."'))
            file_id.write('{0}\n'.format(
                '    sed -i "s/us-east-1.ec2.archive.ubuntu.com/old-releases.ubuntu.com/g" /etc/apt/sources.list'
            ))
            file_id.write('{0}\n'.format('    RC=$?'))
            file_id.write('{0}\n'.format(
                '    if [ $RC -ne 0 ]; then manage_error sed $RC; fi'))
            file_id.write('{0}\n'.format(
                '    sed -i "s/security.ubuntu.com/old-releases.ubuntu\.com/g" /etc/apt/sources.list'
            ))
            file_id.write('{0}\n'.format('    RC=$?'))
            file_id.write('{0}\n'.format(
                '    if [ $RC -ne 0 ]; then manage_error sed $RC; fi'))
            file_id.write('{0}\n'.format('    apt-get update'))
            file_id.write('{0}\n'.format('    RC=$?'))
            file_id.write('{0}\n'.format(
                '    if [ $RC -ne 0 ]; then manage_error apt-get $RC; fi'))
            file_id.write('{0}\n'.format('    echo'))
            file_id.write('{0}\n'.format('    echo "The file is fixed."'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function install_libtbb2'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format(
                '    echo "Installing the package libtbb2 ..."'))
            file_id.write('{0}\n'.format('    echo'))
            file_id.write(
                '{0}\n'.format('    apt-get --assume-yes install libtbb2'))
            file_id.write('{0}\n'.format('    RC=$?'))
            file_id.write('{0}\n'.format(
                '    if [ $RC -ne 0 ]; then manage_error apt-get $RC; fi'))
            file_id.write('{0}\n'.format('    echo'))
            file_id.write(
                '{0}\n'.format('    echo "The package is installed."'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function install_mailutils'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format(
                '    echo "Installing the package mailutils ..."'))
            file_id.write('{0}\n'.format('    echo'))
            file_id.write(
                '{0}\n'.format('    HOST_IP=`curl checkip.amazonaws.com`'))
            file_id.write(
                '{0}\n'.format('    HOST_IP2=`echo "${HOST_IP//./-}"`'))
            file_id.write('{0}\n'.format(
                '    HOST_ADDRESS="ec2-${HOST_IP2}-compute-1.amazonaws.com"'))
            file_id.write('{0}\n'.format(
                '    echo "HOST_IP: $HOST_IP   HOST_ADDRESS: $HOST_ADDRESS"'))
            file_id.write('{0}\n'.format(
                '    debconf-set-selections <<< "postfix postfix/mailname string $HOST_ADDRESS"'
            ))
            file_id.write('{0}\n'.format('    RC=$?'))
            file_id.write('{0}\n'.format(
                '    if [ $RC -ne 0 ]; then manage_error debconf-set-selections $RC; fi'
            ))
            file_id.write('{0}\n'.format(
                '    debconf-set-selections <<< "postfix postfix/main_mailer_type string \'Internet Site\'"'
            ))
            file_id.write('{0}\n'.format('    RC=$?'))
            file_id.write('{0}\n'.format(
                '    if [ $RC -ne 0 ]; then manage_error debconf-set-selections $RC; fi'
            ))
            file_id.write(
                '{0}\n'.format('    apt-get --assume-yes install mailutils'))
            file_id.write('{0}\n'.format('    RC=$?'))
            file_id.write('{0}\n'.format(
                '    if [ $RC -ne 0 ]; then manage_error apt-get $RC; fi'))
            file_id.write('{0}\n'.format('    echo'))
            file_id.write(
                '{0}\n'.format('    echo "The package is installed."'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function end'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    END_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format(
                '    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`'
            ))
            file_id.write('{0}\n'.format('    calculate_duration'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format(
                '    echo "Script ended OK at $FORMATTED_END_DATETIME UTC with a run duration of $DURATION s ($FORMATTED_DURATION)."'
            ))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    RECIPIENT={0}'.format(
                xconfiguration.get_contact_data())))
            file_id.write('{0}\n'.format(
                '    SUBJECT="{0}: Infrastructure software installation"'.
                format(xlib.get_project_name())))
            file_id.write('{0}\n'.format(
                '    MESSAGE="The infrastructure software installation in node $HOSTNAME of cluster {0} ended OK at $FORMATTED_END_DATETIME with a run duration of $DURATION s ($FORMATTED_DURATION).<br/><br/>Regards,<br/>GI Genetica, Fisiologia e Historia Forestal<br/>Dpto. Sistemas y Recursos Naturales<br/>ETSI Montes, Forestal y del Medio Natural<br/>Universidad Politecnica de Madrid<br/>https://github.com/ggfhf/"'
                .format(cluster_name)))
            file_id.write('{0}\n'.format(
                '    mail --append "Content-type: text/html;"  --subject "$SUBJECT" "$RECIPIENT" <<< "$MESSAGE"'
            ))
            file_id.write('{0}\n'.format('    exit 0'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function manage_error'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    END_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format(
                '    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`'
            ))
            file_id.write('{0}\n'.format('    calculate_duration'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write(
                '{0}\n'.format('    echo "ERROR: $1 returned error $2"'))
            file_id.write('{0}\n'.format(
                '    echo "Script ended WRONG at $FORMATTED_END_DATETIME UTC with a run duration of $DURATION s ($FORMATTED_DURATION)."'
            ))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    RECIPIENT={0}'.format(
                xconfiguration.get_contact_data())))
            file_id.write('{0}\n'.format(
                '    SUBJECT="{0}: Infrastructure software installation"'.
                format(xlib.get_project_name())))
            file_id.write('{0}\n'.format(
                '    MESSAGE="The infrastructure software installation in node $HOSTNAME of cluster {0} ended WRONG at $FORMATTED_END_DATETIME with a run duration of $DURATION s ($FORMATTED_DURATION).<br/><br/>Regards,<br/>GI Genetica, Fisiologia e Historia Forestal<br/>Dpto. Sistemas y Recursos Naturales<br/>ETSI Montes, Forestal y del Medio Natural<br/>Universidad Politecnica de Madrid<br/>https://github.com/ggfhf/"'
                .format(cluster_name)))
            file_id.write('{0}\n'.format(
                '    mail --append "Content-type: text/html;"  --subject "$SUBJECT" "$RECIPIENT" <<< "$MESSAGE"'
            ))
            file_id.write('{0}\n'.format('    exit 3'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function calculate_duration'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format(
                '    DURATION=`expr $END_DATETIME - $INIT_DATETIME`'))
            file_id.write('{0}\n'.format('    HH=`expr $DURATION / 3600`'))
            file_id.write(
                '{0}\n'.format('    MM=`expr $DURATION % 3600 / 60`'))
            file_id.write('{0}\n'.format('    SS=`expr $DURATION % 60`'))
            file_id.write('{0}\n'.format(
                '    FORMATTED_DURATION=`printf "%03d:%02d:%02d\\n" $HH $MM $SS`'
            ))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('init'))
            file_id.write('{0}\n'.format('fix_source_list'))
            file_id.write('{0}\n'.format('install_libtbb2'))
            file_id.write('{0}\n'.format('install_mailutils'))
            file_id.write('{0}\n'.format('end'))
    except:
        error_list.append('*** ERROR: The file {0} can not be created'.format(
            infrastructure_software_installation_script))
        OK = False

    # return the control variable and the error list
    return (OK, error_list)
Esempio n. 17
0
def build_quast_process_script(cluster_name, current_run_dir):
    '''
    Build the current QUAST process script.
    '''

    # initialize the control variable and the error list
    OK = True
    error_list = []

    # get the QUAST option dictionary
    quast_option_dict = xlib.get_option_dict(get_quast_config_file())

    # get the options
    experiment_id = quast_option_dict['identification']['experiment_id']
    reference_dataset_id = quast_option_dict['identification']['reference_dataset_id']
    reference_file = quast_option_dict['identification']['reference_file']
    assembly_software = quast_option_dict['identification']['assembly_software']
    assembly_dataset_id = quast_option_dict['identification']['assembly_dataset_id']
    assembly_type = quast_option_dict['identification']['assembly_type']
    threads = quast_option_dict['QUAST parameters']['threads']

    # set the reference file path
    if reference_dataset_id.upper() != 'NONE':
        reference_file_path = xlib.get_cluster_reference_file(reference_dataset_id, reference_file)

    # set the transcriptome file path
    if assembly_software == xlib.get_soapdenovotrans_code():
        if assembly_type.upper() == 'CONTIGS':
            transcriptome_file = '{0}/{1}-{2}.contig'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id), experiment_id, assembly_dataset_id)
        elif assembly_type.upper() == 'SCAFFOLDS':
            transcriptome_file = '{0}/{1}-{2}.scafSeq'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id), experiment_id, assembly_dataset_id)
    elif assembly_software == xlib.get_transabyss_code():
        transcriptome_file = '{0}/transabyss-final.fa'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id))
    elif assembly_software == xlib.get_trinity_code():
        transcriptome_file = '{0}/Trinity.fasta'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id))
    elif assembly_software == xlib.get_star_code():
        transcriptome_file = '{0}/Trinity-GG.fasta'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id))
    elif assembly_software == xlib.get_cd_hit_est_code():
        transcriptome_file = '{0}/clustered-transcriptome.fasta'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id))
    elif assembly_software == xlib.get_transcript_filter_code():
        transcriptome_file = '{0}/filtered-transcriptome.fasta'.format(xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id))

    # get the QUAST process script name
    quast_process_script = get_quast_process_script()

    # write the QUAST process script
    try:
        if not os.path.exists(os.path.dirname(quast_process_script)):
            os.makedirs(os.path.dirname(quast_process_script))
        with open(quast_process_script, mode='w', encoding='utf8', newline='\n') as file_id:
            file_id.write('{0}\n'.format('#!/bin/bash'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('QUAST_PATH={0}/{1}/envs/{2}/bin'.format(xlib.get_cluster_app_dir(), xlib.get_miniconda3_name(), xlib.get_quast_bioconda_code())))
            file_id.write('{0}\n'.format('PATH=$QUAST_PATH:$PATH'))
            file_id.write('{0}\n'.format('SEP="#########################################"'))
            file_id.write('{0}\n'.format('cd {0}/{1}/bin'.format(xlib.get_cluster_app_dir(), xlib.get_miniconda3_name())))
            file_id.write('{0}\n'.format('source activate {0}'.format(xlib.get_quast_bioconda_code())))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function init'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    INIT_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format('    FORMATTED_INIT_DATETIME=`date --date="@$INIT_DATETIME" "+%Y-%m-%d %H:%M:%S"`'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    echo "Script started in node $HOSTNAME of cluster {0} at $FORMATTED_INIT_DATETIME UTC."'.format(cluster_name)))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function run_quast_process'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    cd {0}'.format(current_run_dir)))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    quast.py --version'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    /usr/bin/time \\'))
            file_id.write('{0}\n'.format('        --format="$SEP\\nElapsed real time (s): %e\\nCPU time in kernel mode (s): %S\\nCPU time in user mode (s): %U\\nPercentage of CPU: %P\\nMaximum resident set size(Kb): %M\\nAverage total memory use (Kb):%K" \\'))
            file_id.write('{0}\n'.format('        quast.py \\'))
            file_id.write('{0}\n'.format('            --threads {0} \\'.format(threads)))
            file_id.write('{0}\n'.format('            --output-dir {0} \\'.format(current_run_dir)))
            if reference_dataset_id.upper() != 'NONE':
                file_id.write('{0}\n'.format('            -R {0} \\'.format(reference_file_path)))
            if assembly_type.upper() == 'SCAFFOLDS':
                file_id.write('{0}\n'.format('            --scaffolds \\'))
            file_id.write('{0}\n'.format('            {0}'.format(transcriptome_file)))
            file_id.write('{0}\n'.format('    RC=$?'))
            file_id.write('{0}\n'.format('    if [ $RC -ne 0 ]; then manage_error quast.py $RC; fi'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function end'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    END_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format('    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`'))
            file_id.write('{0}\n'.format('    calculate_duration'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    echo "Script ended OK at $FORMATTED_END_DATETIME UTC with a run duration of $DURATION s ($FORMATTED_DURATION)."'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    RECIPIENT={0}'.format(xconfiguration.get_contact_data())))
            file_id.write('{0}\n'.format('    SUBJECT="{0}: {1} process"'.format(xlib.get_project_name(), xlib.get_quast_name())))
            file_id.write('{0}\n'.format('    MESSAGE="The {0} process in node $HOSTNAME of cluster {1} ended OK at $FORMATTED_END_DATETIME with a run duration of $DURATION s ($FORMATTED_DURATION). Please review its log.<br/><br/>Regards,<br/>GI Genetica, Fisiologia e Historia Forestal<br/>Dpto. Sistemas y Recursos Naturales<br/>ETSI Montes, Forestal y del Medio Natural<br/>Universidad Politecnica de Madrid<br/>https://github.com/ggfhf/"'.format(xlib.get_quast_name(), cluster_name)))
            file_id.write('{0}\n'.format('    mail --append "Content-type: text/html;"  --subject "$SUBJECT" "$RECIPIENT" <<< "$MESSAGE"'))
            file_id.write('{0}\n'.format('    exit 0'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function manage_error'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    END_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format('    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`'))
            file_id.write('{0}\n'.format('    calculate_duration'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    echo "ERROR: $1 returned error $2"'))
            file_id.write('{0}\n'.format('    echo "Script ended WRONG at $FORMATTED_END_DATETIME UTC with a run duration of $DURATION s ($FORMATTED_DURATION)."'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    RECIPIENT={0}'.format(xconfiguration.get_contact_data())))
            file_id.write('{0}\n'.format('    SUBJECT="{0}: {1} process"'.format(xlib.get_project_name(), xlib.get_quast_name())))
            file_id.write('{0}\n'.format('    MESSAGE="The {0} process in node $HOSTNAME of cluster {1} ended WRONG at $FORMATTED_END_DATETIME with a run duration of $DURATION s ($FORMATTED_DURATION). Please review its log.<br/><br/>Regards,<br/>GI Genetica, Fisiologia e Historia Forestal<br/>Dpto. Sistemas y Recursos Naturales<br/>ETSI Montes, Forestal y del Medio Natural<br/>Universidad Politecnica de Madrid<br/>https://github.com/ggfhf/"'.format(xlib.get_quast_name(), cluster_name)))
            file_id.write('{0}\n'.format('    mail --append "Content-type: text/html;"  --subject "$SUBJECT" "$RECIPIENT" <<< "$MESSAGE"'))
            file_id.write('{0}\n'.format('    exit 3'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('function calculate_duration'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    DURATION=`expr $END_DATETIME - $INIT_DATETIME`'))
            file_id.write('{0}\n'.format('    HH=`expr $DURATION / 3600`'))
            file_id.write('{0}\n'.format('    MM=`expr $DURATION % 3600 / 60`'))
            file_id.write('{0}\n'.format('    SS=`expr $DURATION % 60`'))
            file_id.write('{0}\n'.format('    FORMATTED_DURATION=`printf "%03d:%02d:%02d\\n" $HH $MM $SS`'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format('#-------------------------------------------------------------------------------'))
            file_id.write('{0}\n'.format('init'))
            file_id.write('{0}\n'.format('run_quast_process'))
            file_id.write('{0}\n'.format('end'))
    except:
        error_list.append('*** ERROR: The file {0} can not be created'.format(quast_process_script))
        OK = False

    # return the control variable and the error list
    return (OK, error_list)
Esempio n. 18
0
def build_gzip_process_script(cluster_name, dataset_type, current_run_dir):
    '''
    Build the current gzip process script.
    '''

    # initialize the control variable and the error list
    OK = True
    error_list = []

    # get the gzip option dictionary
    gzip_option_dict = xlib.get_option_dict(get_gzip_config_file(dataset_type))

    # get the options
    experiment_id = gzip_option_dict['identification']['experiment_id']
    dataset_type_2 = gzip_option_dict['identification']['dataset_type']
    dataset_id = gzip_option_dict['identification']['dataset_id']
    action = gzip_option_dict['gzip parameters']['action']

    # get the sections list
    sections_list = []
    for section in gzip_option_dict.keys():
        sections_list.append(section)
    sections_list.sort()

    # build the dataset subdirectory and file name lists
    dataset_subdirectory_list = []
    file_name_list = []
    for section in sections_list:
        # if the section identification is like library-n
        if re.match('^file-[0-9]+$', section):
            dataset_subdirectory = gzip_option_dict[section][
                'dataset_subdirectory']
            dataset_subdirectory_list.append(dataset_subdirectory)
            file_name = gzip_option_dict[section]['file_name']
            file_name_list.append(file_name)

    # get the dataset directory
    if dataset_type_2 == 'reference':
        dataset_dir = xlib.get_cluster_reference_dataset_dir(dataset_id)
    elif dataset_type_2 == 'database':
        dataset_dir = xlib.get_cluster_database_dataset_dir(dataset_id)
    elif dataset_type_2 == 'read':
        dataset_dir = xlib.get_cluster_experiment_read_dataset_dir(
            experiment_id, dataset_id)
    elif dataset_type_2 == 'result':
        dataset_dir = xlib.get_cluster_experiment_result_dataset_dir(
            experiment_id, dataset_id)
    elif dataset_type_2 == 'whole-result':
        dataset_dir = xlib.get_cluster_experiment_result_dataset_dir(
            experiment_id, dataset_id)

    # write the gzip process script
    try:
        if not os.path.exists(
                os.path.dirname(get_gzip_process_script(dataset_type_2))):
            os.makedirs(
                os.path.dirname(get_gzip_process_script(dataset_type_2)))
        with open(get_gzip_process_script(dataset_type_2),
                  mode='w',
                  encoding='utf8',
                  newline='\n') as file_id:
            file_id.write('{0}\n'.format('#!/bin/bash'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format(
                'SEP="#########################################"'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function init'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    INIT_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format(
                '    FORMATTED_INIT_DATETIME=`date --date="@$INIT_DATETIME" "+%Y-%m-%d %H:%M:%S"`'
            ))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format(
                '    echo "Script started in node $HOSTNAME of cluster {0} at $FORMATTED_INIT_DATETIME UTC."'
                .format(cluster_name)))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function run_gzip_process'))
            file_id.write('{0}\n'.format('{'))
            if dataset_type_2 in ['reference', 'database', 'read', 'result']:
                file_id.write('{0}\n'.format(
                    '    cd {0}'.format(current_run_dir)))
                for i in range(len(dataset_subdirectory_list)):
                    file_id.write('{0}\n'.format('    echo "$SEP"'))
                    file_id.write('{0}\n'.format(
                        '    echo "Compressing/decompressing {0}/{1}/{2} ..."'.
                        format(dataset_dir, dataset_subdirectory_list[i],
                               file_name_list[i])))
                    file_id.write('{0}\n'.format('    /usr/bin/time \\'))
                    file_id.write('{0}\n'.format(
                        '        --format="Elapsed real time (s): %e\\nCPU time in kernel mode (s): %S\\nCPU time in user mode (s): %U\\nPercentage of CPU: %P\\nMaximum resident set size(Kb): %M\\nAverage total memory use (Kb):%K" \\'
                    ))
                    if action == 'compress':
                        file_id.write('{0}\n'.format(
                            '        gzip {0}/{1}/{2}'.format(
                                dataset_dir, dataset_subdirectory_list[i],
                                file_name_list[i])))
                    elif action == 'decompress':
                        file_id.write('{0}\n'.format(
                            '        gzip --decompress {0}/{1}/{2}'.format(
                                dataset_dir, dataset_subdirectory_list[i],
                                file_name_list[i])))
                    file_id.write('{0}\n'.format('    RC=$?'))
                    file_id.write('{0}\n'.format(
                        '    if [ $RC -ne 0 ]; then manage_error gzip $RC; fi')
                                  )
            elif dataset_type_2 == 'whole-result':
                file_id.write('{0}\n'.format(
                    '    cd {0}'.format(current_run_dir)))
                file_id.write('{0}\n'.format('    echo "$SEP"'))
                file_id.write('{0}\n'.format(
                    '    echo "Compressing/decompressing {0} ..."'.format(
                        dataset_dir)))
                file_id.write('{0}\n'.format('    /usr/bin/time \\'))
                file_id.write('{0}\n'.format(
                    '        --format="Elapsed real time (s): %e\\nCPU time in kernel mode (s): %S\\nCPU time in user mode (s): %U\\nPercentage of CPU: %P\\nMaximum resident set size(Kb): %M\\nAverage total memory use (Kb):%K" \\'
                ))
                if action == 'compress':
                    file_id.write('{0}\n'.format(
                        '        tar --create --gzip --verbose --file={0}.tar.gz {0}'
                        .format(dataset_dir)))
                elif action == 'decompress':
                    file_id.write('{0}\n'.format(
                        '        tar --extract --gzip --verbose --file={0} --directory=/'
                        .format(dataset_dir)))
                file_id.write('{0}\n'.format('    RC=$?'))
                file_id.write('{0}\n'.format(
                    '    if [ $RC -ne 0 ]; then manage_error tar $RC; fi'))
                file_id.write('{0}\n'.format('    echo "$SEP"'))
                file_id.write('{0}\n'.format(
                    '    echo "Removing {0} ..."'.format(dataset_dir)))
                file_id.write('{0}\n'.format('    /usr/bin/time \\'))
                file_id.write('{0}\n'.format(
                    '        --format="Elapsed real time (s): %e\\nCPU time in kernel mode (s): %S\\nCPU time in user mode (s): %U\\nPercentage of CPU: %P\\nMaximum resident set size(Kb): %M\\nAverage total memory use (Kb):%K" \\'
                ))
                file_id.write('{0}\n'.format(
                    '        rm -rf {0}'.format(dataset_dir)))
                file_id.write('{0}\n'.format('    RC=$?'))
                file_id.write('{0}\n'.format(
                    '    if [ $RC -ne 0 ]; then manage_error rm $RC; fi'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function end'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    END_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format(
                '    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`'
            ))
            file_id.write('{0}\n'.format('    calculate_duration'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format(
                '    echo "Script ended OK at $FORMATTED_END_DATETIME UTC with a run duration of $DURATION s ($FORMATTED_DURATION)."'
            ))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    RECIPIENT={0}'.format(
                xconfiguration.get_contact_data())))
            file_id.write('{0}\n'.format(
                '    SUBJECT="{0}: {1} process"'.format(
                    xlib.get_project_name(), xlib.get_gzip_name())))
            file_id.write('{0}\n'.format(
                '    MESSAGE="The {0} process in node $HOSTNAME of cluster {0} ended OK at $FORMATTED_END_DATETIME with a run duration of $DURATION s ($FORMATTED_DURATION). Please review its log.<br/><br/>Regards,<br/>GI Genetica, Fisiologia e Historia Forestal<br/>Dpto. Sistemas y Recursos Naturales<br/>ETSI Montes, Forestal y del Medio Natural<br/>Universidad Politecnica de Madrid<br/>https://github.com/ggfhf/"'
                .format(xlib.get_gzip_name(), cluster_name)))
            file_id.write('{0}\n'.format(
                '    mail --append "Content-type: text/html;"  --subject "$SUBJECT" "$RECIPIENT" <<< "$MESSAGE"'
            ))
            file_id.write('{0}\n'.format('    exit 0'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function manage_error'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format('    END_DATETIME=`date --utc +%s`'))
            file_id.write('{0}\n'.format(
                '    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`'
            ))
            file_id.write('{0}\n'.format('    calculate_duration'))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write(
                '{0}\n'.format('    echo "ERROR: $1 returned error $2"'))
            file_id.write('{0}\n'.format(
                '    echo "Script ended WRONG at $FORMATTED_END_DATETIME UTC with a run duration of $DURATION s ($FORMATTED_DURATION)."'
            ))
            file_id.write('{0}\n'.format('    echo "$SEP"'))
            file_id.write('{0}\n'.format('    RECIPIENT={0}'.format(
                xconfiguration.get_contact_data())))
            file_id.write('{0}\n'.format(
                '    SUBJECT="{0}: {1} process"'.format(
                    xlib.get_project_name(), xlib.get_gzip_name())))
            file_id.write('{0}\n'.format(
                '    MESSAGE="The {0} process in node $HOSTNAME of cluster {0} ended WRONG at $FORMATTED_END_DATETIME with a run duration of $DURATION s ($FORMATTED_DURATION). Please review its log.<br/><br/>Regards,<br/>GI Genetica, Fisiologia e Historia Forestal<br/>Dpto. Sistemas y Recursos Naturales<br/>ETSI Montes, Forestal y del Medio Natural<br/>Universidad Politecnica de Madrid<br/>https://github.com/ggfhf/"'
                .format(xlib.get_gzip_name(), cluster_name)))
            file_id.write('{0}\n'.format(
                '    mail --append "Content-type: text/html;"  --subject "$SUBJECT" "$RECIPIENT" <<< "$MESSAGE"'
            ))
            file_id.write('{0}\n'.format('    exit 3'))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('function calculate_duration'))
            file_id.write('{0}\n'.format('{'))
            file_id.write('{0}\n'.format(
                '    DURATION=`expr $END_DATETIME - $INIT_DATETIME`'))
            file_id.write('{0}\n'.format('    HH=`expr $DURATION / 3600`'))
            file_id.write(
                '{0}\n'.format('    MM=`expr $DURATION % 3600 / 60`'))
            file_id.write('{0}\n'.format('    SS=`expr $DURATION % 60`'))
            file_id.write('{0}\n'.format(
                '    FORMATTED_DURATION=`printf "%03d:%02d:%02d\\n" $HH $MM $SS`'
            ))
            file_id.write('{0}\n'.format('}'))
            file_id.write('{0}\n'.format(
                '#-------------------------------------------------------------------------------'
            ))
            file_id.write('{0}\n'.format('init'))
            file_id.write('{0}\n'.format('run_gzip_process'))
            file_id.write('{0}\n'.format('end'))
    except:
        error_list.append('*** ERROR: The file {0} can not be created'.format(
            get_gzip_process_script(dataset_type_2)))
        OK = False

    # return the control variable and the error list
    return (OK, error_list)