Esempio n. 1
0
def build_htseq_count_process_script(cluster_name, current_run_dir):
    '''
    Build the current htseq-count process script.
    '''

    # initialize the control variable and the error list
    OK = True
    error_list = []

    # get the htseq-count option dictionary
    htseq_count_option_dict = xlib.get_option_dict(
        get_htseq_count_config_file())

    # get the options
    experiment_id = htseq_count_option_dict['identification']['experiment_id']
    reference_dataset_id = htseq_count_option_dict['identification'][
        'reference_dataset_id']
    annotation_file = htseq_count_option_dict['identification'][
        'annotation_file']
    nprocesses = htseq_count_option_dict['htseq-count parameters'][
        'nprocesses']
    stranded = htseq_count_option_dict['htseq-count parameters']['stranded']
    minaqual = htseq_count_option_dict['htseq-count parameters']['minaqual']
    type = htseq_count_option_dict['htseq-count parameters']['type']
    idattr = htseq_count_option_dict['htseq-count parameters']['idattr']
    mode = htseq_count_option_dict['htseq-count parameters']['mode']
    nonunique = htseq_count_option_dict['htseq-count parameters']['nonunique']
    other_parameters = htseq_count_option_dict['htseq-count parameters'][
        'other_parameters']

    # get the sections list
    sections_list = []
    for section in htseq_count_option_dict.keys():
        sections_list.append(section)
    sections_list.sort()

    # build alignment dataset identification list
    alignment_software_list = []
    alignment_dataset_id_list = []
    for section in sections_list:
        # if the section identification is like library-n
        if re.match('^alignment-dataset-[0-9]+$', section):
            alignment_software_list.append(
                htseq_count_option_dict[section]['alignment_software'])
            alignment_dataset_id_list.append(
                htseq_count_option_dict[section]['alignment_dataset_id'])

    # set the annotation file path
    annotation_file = xlib.get_cluster_reference_file(reference_dataset_id,
                                                      annotation_file)

    # write the htseq-count process script
    try:
        if not os.path.exists(os.path.dirname(
                get_htseq_count_process_script())):
            os.makedirs(os.path.dirname(get_htseq_count_process_script()))
        with open(get_htseq_count_process_script(),
                  mode='w',
                  encoding='iso-8859-1',
                  newline='\n') as script_file_id:
            script_file_id.write('#!/bin/bash\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write(
                'SEP="#########################################"\n')
            script_file_id.write(
                'export HOST_IP=`curl --silent checkip.amazonaws.com`\n')
            script_file_id.write(
                'export HOST_ADDRESS="ec2-${HOST_IP//./-}-compute-1.amazonaws.com"\n'
            )
            script_file_id.write(
                'export AWS_CONFIG_FILE=/home/ubuntu/.aws/config\n')
            script_file_id.write(
                'export AWS_SHARED_CREDENTIALS_FILE=/home/ubuntu/.aws/credentials\n'
            )
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write(
                f'MINICONDA3_BIN_PATH={xlib.get_cluster_app_dir()}/{xlib.get_miniconda3_name()}/bin\n'
            )
            script_file_id.write(f'export PATH=$MINICONDA3_BIN_PATH:$PATH\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write(
                f'STATUS_DIR={xlib.get_status_dir(current_run_dir)}\n')
            script_file_id.write(
                f'SCRIPT_STATUS_OK={xlib.get_status_ok(current_run_dir)}\n')
            script_file_id.write(
                f'SCRIPT_STATUS_WRONG={xlib.get_status_wrong(current_run_dir)}\n'
            )
            script_file_id.write('mkdir --parents $STATUS_DIR\n')
            script_file_id.write(
                'if [ -f $SCRIPT_STATUS_OK ]; then rm $SCRIPT_STATUS_OK; fi\n')
            script_file_id.write(
                'if [ -f $SCRIPT_STATUS_WRONG ]; then rm $SCRIPT_STATUS_WRONG; fi\n'
            )
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write(f'CURRENT_DIR={current_run_dir}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function init\n')
            script_file_id.write('{\n')
            script_file_id.write('    INIT_DATETIME=`date --utc +%s`\n')
            script_file_id.write(
                '    FORMATTED_INIT_DATETIME=`date --date="@$INIT_DATETIME" "+%Y-%m-%d %H:%M:%S"`\n'
            )
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write(
                '    echo "Script started at $FORMATTED_INIT_DATETIME+00:00."\n'
            )
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write(f'    echo "CLUSTER: {cluster_name}"\n')
            script_file_id.write('    echo "HOST NAME: $HOSTNAME"\n')
            script_file_id.write('    echo "HOST IP: $HOST_IP"\n')
            script_file_id.write('    echo "HOST ADDRESS: $HOST_ADDRESS"\n')
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function print_htseq_count_version\n')
            script_file_id.write('{\n')
            script_file_id.write(
                f'    source activate {xlib.get_htseq_anaconda_code()}\n')
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write('    # -- htseq-count --version\n')
            script_file_id.write('    conda deactivate\n')
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function run_htseq_count_process\n')
            script_file_id.write('{\n')
            script_file_id.write(
                f'    source activate {xlib.get_htseq_anaconda_code()}\n')
            script_file_id.write('    cd $CURRENT_DIR\n')
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write('    echo "Counting reads ..."\n')
            script_file_id.write('    /usr/bin/time \\\n')
            script_file_id.write(
                f'        --format="{xlib.get_time_output_format(separator=False)}" \\\n'
            )
            script_file_id.write('        htseq-count \\\n')
            script_file_id.write(f'            --nprocesses={nprocesses} \\\n')
            script_file_id.write('            --format=bam \\\n')
            script_file_id.write(
                f'            --stranded={stranded.lower()} \\\n')
            script_file_id.write(f'            --minaqual={minaqual} \\\n')
            script_file_id.write(f'            --type={type} \\\n')
            script_file_id.write(f'            --idattr={idattr} \\\n')
            script_file_id.write(f'            --mode={mode.lower()} \\\n')
            script_file_id.write(
                f'            --nonunique={nonunique.lower()} \\\n')
            script_file_id.write('            --quiet \\\n')
            if other_parameters.upper() != 'NONE':
                parameter_list = [
                    x.strip() for x in other_parameters.split(';')
                ]
                for i in range(len(parameter_list)):
                    if parameter_list[i].find('=') > 0:
                        pattern = r'^--(.+)=(.+)$'
                        mo = re.search(pattern, parameter_list[i])
                        parameter_name = mo.group(1).strip()
                        parameter_value = mo.group(2).strip()
                        script_file_id.write(
                            f'            --{parameter_name}={parameter_value} \\\n'
                        )
                    else:
                        pattern = r'^--(.+)$'
                        mo = re.search(pattern, parameter_list[i])
                        parameter_name = mo.group(1).strip()
                        script_file_id.write(
                            f'            --{parameter_name} \\\n')
            for i in range(len(alignment_dataset_id_list)):
                alignment_files = f'{xlib.get_cluster_experiment_result_dataset_dir(experiment_id, alignment_dataset_id_list[i])}/*.sorted.bam'
                script_file_id.write(f'            {alignment_files} \\\n')
            script_file_id.write(f'            {annotation_file} \\\n')
            script_file_id.write(f'            > read-count.txt\n')
            script_file_id.write('    RC=$?\n')
            script_file_id.write(
                '    if [ $RC -ne 0 ]; then manage_error htseq-count $RC; fi\n'
            )
            script_file_id.write('    echo "Reads are counted."\n')
            script_file_id.write('    conda deactivate\n')
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function end\n')
            script_file_id.write('{\n')
            script_file_id.write('    END_DATETIME=`date --utc +%s`\n')
            script_file_id.write(
                '    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`\n'
            )
            script_file_id.write('    calculate_duration\n')
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write(
                '    echo "Script ended OK at $FORMATTED_END_DATETIME+00:00 with a run duration of $DURATION s ($FORMATTED_DURATION)."\n'
            )
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write('    send_mail ok\n')
            script_file_id.write('    touch $SCRIPT_STATUS_OK\n')
            script_file_id.write('    exit 0\n')
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function manage_error\n')
            script_file_id.write('{\n')
            script_file_id.write('    END_DATETIME=`date --utc +%s`\n')
            script_file_id.write(
                '    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`\n'
            )
            script_file_id.write('    calculate_duration\n')
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write('    echo "ERROR: $1 returned error $2"\n')
            script_file_id.write(
                '    echo "Script ended WRONG at $FORMATTED_END_DATETIME+00:00 with a run duration of $DURATION s ($FORMATTED_DURATION)."\n'
            )
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write('    send_mail wrong\n')
            script_file_id.write('    touch $SCRIPT_STATUS_WRONG\n')
            script_file_id.write('    exit 3\n')
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            process_name = f'{xlib.get_htseq_count_name()} process'
            mail_message_ok = xlib.get_mail_message_ok(process_name,
                                                       cluster_name)
            mail_message_wrong = xlib.get_mail_message_wrong(
                process_name, cluster_name)
            script_file_id.write('function send_mail\n')
            script_file_id.write('{\n')
            script_file_id.write(
                f'    SUBJECT="{xlib.get_project_name()}: {process_name}"\n')
            script_file_id.write('    if [ "$1" == "ok" ]; then\n')
            script_file_id.write(f'        MESSAGE="{mail_message_ok}"\n')
            script_file_id.write('    elif [ "$1" == "wrong" ]; then\n')
            script_file_id.write(f'        MESSAGE="{mail_message_wrong}"\n')
            script_file_id.write('    else\n')
            script_file_id.write('         MESSAGE=""\n')
            script_file_id.write('    fi\n')
            script_file_id.write(
                '    DESTINATION_FILE=mail-destination.json\n')
            script_file_id.write('    echo "{" > $DESTINATION_FILE\n')
            script_file_id.write(
                f'    echo "    \\\"ToAddresses\\\":  [\\\"{xconfiguration.get_contact_data()}\\\"]," >> $DESTINATION_FILE\n'
            )
            script_file_id.write(
                '    echo "    \\\"CcAddresses\\\":  []," >> $DESTINATION_FILE\n'
            )
            script_file_id.write(
                '    echo "    \\\"BccAddresses\\\":  []" >> $DESTINATION_FILE\n'
            )
            script_file_id.write('    echo "}" >> $DESTINATION_FILE\n')
            script_file_id.write('    MESSAGE_FILE=mail-message.json\n')
            script_file_id.write('    echo "{" > $MESSAGE_FILE\n')
            script_file_id.write(
                '    echo "    \\\"Subject\\\": {" >> $MESSAGE_FILE\n')
            script_file_id.write(
                '    echo "        \\\"Data\\\":  \\\"$SUBJECT\\\"," >> $MESSAGE_FILE\n'
            )
            script_file_id.write(
                '    echo "        \\\"Charset\\\":  \\\"UTF-8\\\"" >> $MESSAGE_FILE\n'
            )
            script_file_id.write('    echo "    }," >> $MESSAGE_FILE\n')
            script_file_id.write(
                '    echo "    \\\"Body\\\": {" >> $MESSAGE_FILE\n')
            script_file_id.write(
                '    echo "        \\\"Html\\\": {" >> $MESSAGE_FILE\n')
            script_file_id.write(
                '    echo "            \\\"Data\\\":  \\\"$MESSAGE\\\"," >> $MESSAGE_FILE\n'
            )
            script_file_id.write(
                '    echo "            \\\"Charset\\\":  \\\"UTF-8\\\"" >> $MESSAGE_FILE\n'
            )
            script_file_id.write('    echo "        }" >> $MESSAGE_FILE\n')
            script_file_id.write('    echo "    }" >> $MESSAGE_FILE\n')
            script_file_id.write('    echo "}" >> $MESSAGE_FILE\n')
            script_file_id.write(
                f'    aws ses send-email --from {xconfiguration.get_contact_data()} --destination file://$DESTINATION_FILE --message file://$MESSAGE_FILE\n'
            )
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function calculate_duration\n')
            script_file_id.write('{\n')
            script_file_id.write(
                '    DURATION=`expr $END_DATETIME - $INIT_DATETIME`\n')
            script_file_id.write('    HH=`expr $DURATION / 3600`\n')
            script_file_id.write('    MM=`expr $DURATION % 3600 / 60`\n')
            script_file_id.write('    SS=`expr $DURATION % 60`\n')
            script_file_id.write(
                '    FORMATTED_DURATION=`printf "%03d:%02d:%02d\\n" $HH $MM $SS`\n'
            )
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('init\n')
            script_file_id.write('print_htseq_count_version\n')
            script_file_id.write('run_htseq_count_process\n')
            script_file_id.write('end\n')
    except Exception as e:
        error_list.append(f'*** EXCEPTION: "{e}".')
        error_list.append(
            f'*** ERROR: The file {get_htseq_count_process_script()} can not be created'
        )
        OK = False

    # return the control variable and the error list
    return (OK, error_list)
Esempio n. 2
0
def build_busco_process_script(cluster_name, current_run_dir):
    '''
    Build the current BUSCO process script.
    '''

    # initialize the control variable and the error list
    OK = True
    error_list = []

    # get the BUSCO option dictionary
    busco_option_dict = xlib.get_option_dict(get_busco_config_file())

    # get the options
    experiment_id = busco_option_dict['identification']['experiment_id']
    assembly_software = busco_option_dict['identification'][
        'assembly_software']
    assembly_dataset_id = busco_option_dict['identification'][
        'assembly_dataset_id']
    assembly_type = busco_option_dict['identification']['assembly_type']
    ncpu = busco_option_dict['BUSCO parameters']['ncpu']
    lineage_data_url = busco_option_dict['BUSCO parameters'][
        'lineage_data_url']
    mode = busco_option_dict['BUSCO parameters']['mode'].lower()
    evalue = busco_option_dict['BUSCO parameters']['evalue']
    limit = busco_option_dict['BUSCO parameters']['limit']
    species = busco_option_dict['BUSCO parameters']['species']
    long = busco_option_dict['BUSCO parameters']['long'].upper()
    augustus_options = busco_option_dict['BUSCO parameters'][
        'augustus_options'].upper()

    # get the file and name from the lineage data url
    lineage_data_file = lineage_data_url.split("/")[-1]
    # -- lineage_data = lineage_data_file[:lineage_data_file.find('.tar.gz')]
    point_pos = lineage_data_file.find('.')
    lineage_data = lineage_data_file[:point_pos]

    # set the transcriptome file path
    if assembly_software == xlib.get_soapdenovotrans_code():
        if assembly_type == 'CONTIGS':
            transcriptome_file = f'{xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id)}/{experiment_id}-{assembly_dataset_id}.contig'
        elif assembly_type == 'SCAFFOLDS':
            transcriptome_file = f'{xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id)}/{experiment_id}-{assembly_dataset_id}.scafSeq'
    elif assembly_software == xlib.get_transabyss_code():
        transcriptome_file = f'{xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id)}/transabyss-final.fa'
    elif assembly_software == xlib.get_trinity_code():
        transcriptome_file = f'{xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id)}/Trinity.fasta'
    elif assembly_software == xlib.get_ggtrinity_code():
        transcriptome_file = f'{xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id)}/Trinity-GG.fasta'
    elif assembly_software == xlib.get_cd_hit_est_code():
        transcriptome_file = f'{xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id)}/clustered-transcriptome.fasta'
    elif assembly_software == xlib.get_transcript_filter_code():
        transcriptome_file = f'{xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id)}/filtered-transcriptome.fasta'

    # write the BUSCO process script
    try:
        if not os.path.exists(os.path.dirname(get_busco_process_script())):
            os.makedirs(os.path.dirname(get_busco_process_script()))
        with open(get_busco_process_script(),
                  mode='w',
                  encoding='iso-8859-1',
                  newline='\n') as script_file_id:
            script_file_id.write('#!/bin/bash\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write(
                'SEP="#########################################"\n')
            script_file_id.write(
                'export HOST_IP=`curl --silent checkip.amazonaws.com`\n')
            script_file_id.write(
                'export HOST_ADDRESS="ec2-${HOST_IP//./-}-compute-1.amazonaws.com"\n'
            )
            script_file_id.write(
                'export AWS_CONFIG_FILE=/home/ubuntu/.aws/config\n')
            script_file_id.write(
                'export AWS_SHARED_CREDENTIALS_FILE=/home/ubuntu/.aws/credentials\n'
            )
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write(
                f'MINICONDA3_BIN_PATH={xlib.get_cluster_app_dir()}/{xlib.get_miniconda3_name()}/bin\n'
            )
            script_file_id.write(f'export PATH=$MINICONDA3_BIN_PATH:$PATH\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write(
                f'STATUS_DIR={xlib.get_status_dir(current_run_dir)}\n')
            script_file_id.write(
                f'SCRIPT_STATUS_OK={xlib.get_status_ok(current_run_dir)}\n')
            script_file_id.write(
                f'SCRIPT_STATUS_WRONG={xlib.get_status_wrong(current_run_dir)}\n'
            )
            script_file_id.write('mkdir --parents $STATUS_DIR\n')
            script_file_id.write(
                'if [ -f $SCRIPT_STATUS_OK ]; then rm $SCRIPT_STATUS_OK; fi\n')
            script_file_id.write(
                'if [ -f $SCRIPT_STATUS_WRONG ]; then rm $SCRIPT_STATUS_WRONG; fi\n'
            )
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write(f'CURRENT_DIR={current_run_dir}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function init\n')
            script_file_id.write('{\n')
            script_file_id.write('    INIT_DATETIME=`date --utc +%s`\n')
            script_file_id.write(
                '    FORMATTED_INIT_DATETIME=`date --date="@$INIT_DATETIME" "+%Y-%m-%d %H:%M:%S"`\n'
            )
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write(
                '    echo "Script started at $FORMATTED_INIT_DATETIME+00:00."\n'
            )
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write(f'    echo "CLUSTER: {cluster_name}"\n')
            script_file_id.write('    echo "HOST NAME: $HOSTNAME"\n')
            script_file_id.write('    echo "HOST IP: $HOST_IP"\n')
            script_file_id.write('    echo "HOST ADDRESS: $HOST_ADDRESS"\n')
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function download_lineage_data\n')
            script_file_id.write('{\n')
            script_file_id.write('    cd $CURRENT_DIR\n')
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write('    echo "Downloading lineage data ..."\n')
            download_script = f'import requests; r = requests.get(\'{lineage_data_url}\') ; open(\'{lineage_data_file}\' , \'wb\').write(r.content)'
            script_file_id.write(
                f'    $MINICONDA3_BIN_PATH/python3 -c "{download_script}"\n')
            script_file_id.write('    RC=$?\n')
            script_file_id.write(
                '    if [ $RC -ne 0 ]; then manage_error download_script $RC; fi\n'
            )
            script_file_id.write(f'    tar -xzvf ./{lineage_data_file}\n')
            script_file_id.write('    RC=$?\n')
            script_file_id.write(
                '    if [ $RC -ne 0 ]; then manage_error tar $RC; fi\n')
            script_file_id.write(f'    rm ./{lineage_data_file}\n')
            script_file_id.write('    RC=$?\n')
            script_file_id.write(
                '    if [ $RC -ne 0 ]; then manage_error rm $RC; fi\n')
            script_file_id.write('    echo "Lineage data are downloaded."\n')
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function run_busco_process\n')
            script_file_id.write('{\n')
            script_file_id.write(
                f'    source activate {xlib.get_busco_anaconda_code()}\n')
            script_file_id.write('    cd $CURRENT_DIR\n')
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write(
                '    echo "Assessing the transcriptome quality ..."\n')
            script_file_id.write('    /usr/bin/time \\\n')
            script_file_id.write(
                f'        --format="{xlib.get_time_output_format(separator=False)}" \\\n'
            )
            script_file_id.write('        busco \\\n')
            script_file_id.write(f'            --cpu={ncpu} \\\n')
            script_file_id.write(
                f'            --lineage_dataset=./{lineage_data} \\\n')
            script_file_id.write(f'            --mode={mode} \\\n')
            script_file_id.write(f'            --evalue={evalue} \\\n')
            script_file_id.write(f'            --limit={limit} \\\n')
            if species.upper() != 'NONE':
                script_file_id.write(f'            --species={species} \\\n')
            if long == 'YES':
                script_file_id.write('            --long \\\n')
            if augustus_options.upper() != 'NONE':
                script_file_id.write(
                    f'            --august_options="{augustus_options}" \\\n')
            script_file_id.write(f'            --in={transcriptome_file} \\\n')
            script_file_id.write(
                f'            --out={os.path.basename(current_run_dir)}\n')
            script_file_id.write('    RC=$?\n')
            script_file_id.write(
                '    if [ $RC -ne 0 ]; then manage_error run_BUSCO.py $RC; fi\n'
            )
            script_file_id.write('    echo "The assessment is done."\n')
            script_file_id.write('    conda deactivate\n')
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function end\n')
            script_file_id.write('{\n')
            script_file_id.write('    END_DATETIME=`date --utc +%s`\n')
            script_file_id.write(
                '    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`\n'
            )
            script_file_id.write('    calculate_duration\n')
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write(
                '    echo "Script ended OK at $FORMATTED_END_DATETIME+00:00 with a run duration of $DURATION s ($FORMATTED_DURATION)."\n'
            )
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write('    send_mail ok\n')
            script_file_id.write('    touch $SCRIPT_STATUS_OK\n')
            script_file_id.write('    exit 0\n')
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function manage_error\n')
            script_file_id.write('{\n')
            script_file_id.write('    END_DATETIME=`date --utc +%s`\n')
            script_file_id.write(
                '    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`\n'
            )
            script_file_id.write('    calculate_duration\n')
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write('    echo "ERROR: $1 returned error $2"\n')
            script_file_id.write(
                '    echo "Script ended WRONG at $FORMATTED_END_DATETIME+00:00 with a run duration of $DURATION s ($FORMATTED_DURATION)."\n'
            )
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write('    send_mail wrong\n')
            script_file_id.write('    touch $SCRIPT_STATUS_WRONG\n')
            script_file_id.write('    exit 3\n')
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            process_name = f'{xlib.get_busco_name()} process'
            mail_message_ok = xlib.get_mail_message_ok(process_name,
                                                       cluster_name)
            mail_message_wrong = xlib.get_mail_message_wrong(
                process_name, cluster_name)
            script_file_id.write('function send_mail\n')
            script_file_id.write('{\n')
            script_file_id.write(
                f'    SUBJECT="{xlib.get_project_name()}: {process_name}"\n')
            script_file_id.write('    if [ "$1" == "ok" ]; then\n')
            script_file_id.write(f'        MESSAGE="{mail_message_ok}"\n')
            script_file_id.write('    elif [ "$1" == "wrong" ]; then\n')
            script_file_id.write(f'        MESSAGE="{mail_message_wrong}"\n')
            script_file_id.write('    else\n')
            script_file_id.write('         MESSAGE=""\n')
            script_file_id.write('    fi\n')
            script_file_id.write(
                '    DESTINATION_FILE=mail-destination.json\n')
            script_file_id.write('    echo "{" > $DESTINATION_FILE\n')
            script_file_id.write(
                f'    echo "    \\\"ToAddresses\\\":  [\\\"{xconfiguration.get_contact_data()}\\\"]," >> $DESTINATION_FILE\n'
            )
            script_file_id.write(
                '    echo "    \\\"CcAddresses\\\":  []," >> $DESTINATION_FILE\n'
            )
            script_file_id.write(
                '    echo "    \\\"BccAddresses\\\":  []" >> $DESTINATION_FILE\n'
            )
            script_file_id.write('    echo "}" >> $DESTINATION_FILE\n')
            script_file_id.write('    MESSAGE_FILE=mail-message.json\n')
            script_file_id.write('    echo "{" > $MESSAGE_FILE\n')
            script_file_id.write(
                '    echo "    \\\"Subject\\\": {" >> $MESSAGE_FILE\n')
            script_file_id.write(
                '    echo "        \\\"Data\\\":  \\\"$SUBJECT\\\"," >> $MESSAGE_FILE\n'
            )
            script_file_id.write(
                '    echo "        \\\"Charset\\\":  \\\"UTF-8\\\"" >> $MESSAGE_FILE\n'
            )
            script_file_id.write('    echo "    }," >> $MESSAGE_FILE\n')
            script_file_id.write(
                '    echo "    \\\"Body\\\": {" >> $MESSAGE_FILE\n')
            script_file_id.write(
                '    echo "        \\\"Html\\\": {" >> $MESSAGE_FILE\n')
            script_file_id.write(
                '    echo "            \\\"Data\\\":  \\\"$MESSAGE\\\"," >> $MESSAGE_FILE\n'
            )
            script_file_id.write(
                '    echo "            \\\"Charset\\\":  \\\"UTF-8\\\"" >> $MESSAGE_FILE\n'
            )
            script_file_id.write('    echo "        }" >> $MESSAGE_FILE\n')
            script_file_id.write('    echo "    }" >> $MESSAGE_FILE\n')
            script_file_id.write('    echo "}" >> $MESSAGE_FILE\n')
            script_file_id.write(
                f'    aws ses send-email --from {xconfiguration.get_contact_data()} --destination file://$DESTINATION_FILE --message file://$MESSAGE_FILE\n'
            )
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function calculate_duration\n')
            script_file_id.write('{\n')
            script_file_id.write(
                '    DURATION=`expr $END_DATETIME - $INIT_DATETIME`\n')
            script_file_id.write('    HH=`expr $DURATION / 3600`\n')
            script_file_id.write('    MM=`expr $DURATION % 3600 / 60`\n')
            script_file_id.write('    SS=`expr $DURATION % 60`\n')
            script_file_id.write(
                '    FORMATTED_DURATION=`printf "%03d:%02d:%02d\\n" $HH $MM $SS`\n'
            )
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('init\n')
            script_file_id.write('download_lineage_data\n')
            script_file_id.write('run_busco_process\n')
            script_file_id.write('end\n')
    except Exception as e:
        error_list.append(f'*** EXCEPTION: "{e}".')
        error_list.append(
            f'*** ERROR: The file {get_busco_process_script()} can not be created'
        )
        OK = False

    # return the control variable and the error list
    return (OK, error_list)
Esempio n. 3
0
def build_gzip_process_script(cluster_name, dataset_type, current_run_dir):
    '''
    Build the current gzip process script.
    '''

    # initialize the control variable and the error list
    OK = True
    error_list = []

    # get the gzip option dictionary
    gzip_option_dict = xlib.get_option_dict(get_gzip_config_file(dataset_type))

    # get the options
    experiment_id = gzip_option_dict['identification']['experiment_id']
    dataset_type_2 = gzip_option_dict['identification']['dataset_type']
    dataset_id = gzip_option_dict['identification']['dataset_id']
    action = gzip_option_dict['gzip parameters']['action']

    # get the sections list
    sections_list = []
    for section in gzip_option_dict.keys():
        sections_list.append(section)
    sections_list.sort()

    # build the dataset subdirectory and file name lists
    dataset_subdirectory_list = []
    file_name_list = []
    for section in sections_list:
        # if the section identification is like library-n
        if re.match('^file-[0-9]+$', section):
            dataset_subdirectory = gzip_option_dict[section]['dataset_subdirectory']
            dataset_subdirectory_list.append(dataset_subdirectory)
            file_name = gzip_option_dict[section]['file_name']
            file_name_list.append(file_name)

    # get the dataset directory
    if dataset_type_2 == 'reference':
        dataset_dir = xlib.get_cluster_reference_dataset_dir(dataset_id)
    elif dataset_type_2 == 'database':
        dataset_dir = xlib.get_cluster_database_dataset_dir(dataset_id)
    elif dataset_type_2 == 'read':
        dataset_dir = xlib.get_cluster_experiment_read_dataset_dir(experiment_id, dataset_id)
    elif dataset_type_2 == 'result':
        dataset_dir = xlib.get_cluster_experiment_result_dataset_dir(experiment_id, dataset_id)
    elif dataset_type_2 == 'whole-result':
        dataset_dir = xlib.get_cluster_experiment_result_dataset_dir(experiment_id, dataset_id)

    # write the gzip process script
    try:
        if not os.path.exists(os.path.dirname(get_gzip_process_script(dataset_type_2))):
            os.makedirs(os.path.dirname(get_gzip_process_script(dataset_type_2)))
        with open(get_gzip_process_script(dataset_type_2), mode='w', encoding='iso-8859-1', newline='\n') as script_file_id:
            script_file_id.write( '#!/bin/bash\n')
            script_file_id.write( '#-------------------------------------------------------------------------------\n')
            script_file_id.write( 'SEP="#########################################"\n')
            script_file_id.write( 'export HOST_IP=`curl --silent checkip.amazonaws.com`\n')
            script_file_id.write( 'export HOST_ADDRESS="ec2-${HOST_IP//./-}-compute-1.amazonaws.com"\n')
            script_file_id.write( 'export AWS_CONFIG_FILE=/home/ubuntu/.aws/config\n')
            script_file_id.write( 'export AWS_SHARED_CREDENTIALS_FILE=/home/ubuntu/.aws/credentials\n')
            script_file_id.write( '#-------------------------------------------------------------------------------\n')
            script_file_id.write(f'STATUS_DIR={xlib.get_status_dir(current_run_dir)}\n')
            script_file_id.write(f'SCRIPT_STATUS_OK={xlib.get_status_ok(current_run_dir)}\n')
            script_file_id.write(f'SCRIPT_STATUS_WRONG={xlib.get_status_wrong(current_run_dir)}\n')
            script_file_id.write( 'mkdir --parents $STATUS_DIR\n')
            script_file_id.write( 'if [ -f $SCRIPT_STATUS_OK ]; then rm $SCRIPT_STATUS_OK; fi\n')
            script_file_id.write( 'if [ -f $SCRIPT_STATUS_WRONG ]; then rm $SCRIPT_STATUS_WRONG; fi\n')
            script_file_id.write( '#-------------------------------------------------------------------------------\n')
            script_file_id.write( 'function init\n')
            script_file_id.write( '{\n')
            script_file_id.write( '    INIT_DATETIME=`date --utc +%s`\n')
            script_file_id.write( '    FORMATTED_INIT_DATETIME=`date --date="@$INIT_DATETIME" "+%Y-%m-%d %H:%M:%S"`\n')
            script_file_id.write( '    echo "$SEP"\n')
            script_file_id.write( '    echo "Script started at $FORMATTED_INIT_DATETIME+00:00."\n')
            script_file_id.write( '    echo "$SEP"\n')
            script_file_id.write(f'    echo "CLUSTER: {cluster_name}"\n')
            script_file_id.write( '    echo "HOST NAME: $HOSTNAME"\n')
            script_file_id.write( '    echo "HOST IP: $HOST_IP"\n')
            script_file_id.write( '    echo "HOST ADDRESS: $HOST_ADDRESS"\n')
            script_file_id.write( '}\n')
            script_file_id.write( '#-------------------------------------------------------------------------------\n')
            script_file_id.write( '{0}\n'.format('function run_gzip_process'))
            script_file_id.write( '{\n')
            if dataset_type_2 in ['reference', 'database', 'read', 'result']:
                script_file_id.write(f'    cd {current_run_dir}\n')
                for i in range(len(dataset_subdirectory_list)):
                    script_file_id.write( '    echo "$SEP"\n')
                    script_file_id.write( '{0}\n'.format('    echo "Compressing/decompressing {0}/{1}/{2} ..."'.format(dataset_dir, dataset_subdirectory_list[i], file_name_list[i])))
                    script_file_id.write( '    /usr/bin/time \\\n')
                    script_file_id.write( '{0}\n'.format('        --format="Elapsed real time (s): %e\\nCPU time in kernel mode (s): %S\\nCPU time in user mode (s): %U\\nPercentage of CPU: %P\\nMaximum resident set size(Kb): %M\\nAverage total memory use (Kb):%K" \\'))
                    if action == 'compress':
                        script_file_id.write( '{0}\n'.format('        gzip {0}/{1}/{2}'.format(dataset_dir, dataset_subdirectory_list[i], file_name_list[i])))
                    elif action == 'decompress':
                        script_file_id.write( '{0}\n'.format('        gzip --decompress {0}/{1}/{2}'.format(dataset_dir, dataset_subdirectory_list[i], file_name_list[i])))
                    script_file_id.write( '    RC=$?\n')
                    script_file_id.write( '{0}\n'.format('    if [ $RC -ne 0 ]; then manage_error gzip $RC; fi'))
            elif dataset_type_2 == 'whole-result':
                script_file_id.write(f'    cd {current_run_dir}\n')
                script_file_id.write( '    echo "$SEP"\n')
                script_file_id.write( '{0}\n'.format('    echo "Compressing/decompressing {0} ..."'.format(dataset_dir)))
                script_file_id.write( '    /usr/bin/time \\\n')
                script_file_id.write( '{0}\n'.format('        --format="Elapsed real time (s): %e\\nCPU time in kernel mode (s): %S\\nCPU time in user mode (s): %U\\nPercentage of CPU: %P\\nMaximum resident set size(Kb): %M\\nAverage total memory use (Kb):%K" \\'))
                if action == 'compress':
                    script_file_id.write( '{0}\n'.format('        tar --create --gzip --verbose --file={0}.tar.gz {0}'.format(dataset_dir)))
                elif action == 'decompress':
                    script_file_id.write( '{0}\n'.format('        tar --extract --gzip --verbose --file={0} --directory=/'.format(dataset_dir)))
                script_file_id.write( '    RC=$?\n')
                script_file_id.write( '    if [ $RC -ne 0 ]; then manage_error tar $RC; fi\n')
                script_file_id.write( '    echo "$SEP"\n')
                script_file_id.write( '{0}\n'.format('    echo "Removing {0} ..."'.format(dataset_dir)))
                script_file_id.write( '    /usr/bin/time \\\n')
                script_file_id.write( '{0}\n'.format('        --format="Elapsed real time (s): %e\\nCPU time in kernel mode (s): %S\\nCPU time in user mode (s): %U\\nPercentage of CPU: %P\\nMaximum resident set size(Kb): %M\\nAverage total memory use (Kb):%K" \\'))
                script_file_id.write( '{0}\n'.format('        rm -rf {0}'.format(dataset_dir)))
                script_file_id.write( '    RC=$?\n')
                script_file_id.write( '{0}\n'.format('    if [ $RC -ne 0 ]; then manage_error rm $RC; fi'))
            script_file_id.write( '}\n')
            script_file_id.write( '#-------------------------------------------------------------------------------\n')
            script_file_id.write( 'function end\n')
            script_file_id.write( '{\n')
            script_file_id.write( '    END_DATETIME=`date --utc +%s`\n')
            script_file_id.write( '    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`\n')
            script_file_id.write( '    calculate_duration\n')
            script_file_id.write( '    echo "$SEP"\n')
            script_file_id.write( '    echo "Script ended OK at $FORMATTED_END_DATETIME+00:00 with a run duration of $DURATION s ($FORMATTED_DURATION)."\n')
            script_file_id.write( '    echo "$SEP"\n')
            script_file_id.write( '    send_mail ok\n')
            script_file_id.write( '    touch $SCRIPT_STATUS_OK\n')
            script_file_id.write( '    exit 0\n')
            script_file_id.write( '}\n')
            script_file_id.write( '#-------------------------------------------------------------------------------\n')
            script_file_id.write( 'function manage_error\n')
            script_file_id.write( '{\n')
            script_file_id.write( '    END_DATETIME=`date --utc +%s`\n')
            script_file_id.write( '    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`\n')
            script_file_id.write( '    calculate_duration\n')
            script_file_id.write( '    echo "$SEP"\n')
            script_file_id.write( '    echo "ERROR: $1 returned error $2"\n')
            script_file_id.write( '    echo "Script ended WRONG at $FORMATTED_END_DATETIME+00:00 with a run duration of $DURATION s ($FORMATTED_DURATION)."\n')
            script_file_id.write( '    echo "$SEP"\n')
            script_file_id.write( '    send_mail wrong\n')
            script_file_id.write( '    touch $SCRIPT_STATUS_WRONG\n')
            script_file_id.write( '    exit 3\n')
            script_file_id.write( '}\n')
            script_file_id.write( '#-------------------------------------------------------------------------------\n')
            process_name = f'{xlib.get_gzip_name()} process'
            mail_message_ok = xlib.get_mail_message_ok(process_name, cluster_name)
            mail_message_wrong = xlib.get_mail_message_wrong(process_name, cluster_name)
            script_file_id.write( 'function send_mail\n')
            script_file_id.write( '{\n')
            script_file_id.write(f'    SUBJECT="{xlib.get_project_name()}: {process_name}"\n')
            script_file_id.write( '    if [ "$1" == "ok" ]; then\n')
            script_file_id.write(f'        MESSAGE="{mail_message_ok}"\n')
            script_file_id.write( '    elif [ "$1" == "wrong" ]; then\n')
            script_file_id.write(f'        MESSAGE="{mail_message_wrong}"\n')
            script_file_id.write( '    else\n')
            script_file_id.write( '         MESSAGE=""\n')
            script_file_id.write( '    fi\n')
            script_file_id.write( '    DESTINATION_FILE=mail-destination.json\n')
            script_file_id.write( '    echo "{" > $DESTINATION_FILE\n')
            script_file_id.write(f'    echo "    \\\"ToAddresses\\\":  [\\\"{xconfiguration.get_contact_data()}\\\"]," >> $DESTINATION_FILE\n')
            script_file_id.write( '    echo "    \\\"CcAddresses\\\":  []," >> $DESTINATION_FILE\n')
            script_file_id.write( '    echo "    \\\"BccAddresses\\\":  []" >> $DESTINATION_FILE\n')
            script_file_id.write( '    echo "}" >> $DESTINATION_FILE\n')
            script_file_id.write( '    MESSAGE_FILE=mail-message.json\n')
            script_file_id.write( '    echo "{" > $MESSAGE_FILE\n')
            script_file_id.write( '    echo "    \\\"Subject\\\": {" >> $MESSAGE_FILE\n')
            script_file_id.write( '    echo "        \\\"Data\\\":  \\\"$SUBJECT\\\"," >> $MESSAGE_FILE\n')
            script_file_id.write( '    echo "        \\\"Charset\\\":  \\\"UTF-8\\\"" >> $MESSAGE_FILE\n')
            script_file_id.write( '    echo "    }," >> $MESSAGE_FILE\n')
            script_file_id.write( '    echo "    \\\"Body\\\": {" >> $MESSAGE_FILE\n')
            script_file_id.write( '    echo "        \\\"Html\\\": {" >> $MESSAGE_FILE\n')
            script_file_id.write( '    echo "            \\\"Data\\\":  \\\"$MESSAGE\\\"," >> $MESSAGE_FILE\n')
            script_file_id.write( '    echo "            \\\"Charset\\\":  \\\"UTF-8\\\"" >> $MESSAGE_FILE\n')
            script_file_id.write( '    echo "        }" >> $MESSAGE_FILE\n')
            script_file_id.write( '    echo "    }" >> $MESSAGE_FILE\n')
            script_file_id.write( '    echo "}" >> $MESSAGE_FILE\n')
            script_file_id.write(f'    aws ses send-email --from {xconfiguration.get_contact_data()} --destination file://$DESTINATION_FILE --message file://$MESSAGE_FILE\n')
            script_file_id.write( '}\n')
            script_file_id.write( '#-------------------------------------------------------------------------------\n')
            script_file_id.write( 'function calculate_duration\n')
            script_file_id.write( '{\n')
            script_file_id.write( '    DURATION=`expr $END_DATETIME - $INIT_DATETIME`\n')
            script_file_id.write( '    HH=`expr $DURATION / 3600`\n')
            script_file_id.write( '    MM=`expr $DURATION % 3600 / 60`\n')
            script_file_id.write( '    SS=`expr $DURATION % 60`\n')
            script_file_id.write( '    FORMATTED_DURATION=`printf "%03d:%02d:%02d\\n" $HH $MM $SS`\n')
            script_file_id.write( '}\n')
            script_file_id.write( '#-------------------------------------------------------------------------------\n')
            script_file_id.write( 'init\n')
            script_file_id.write( '{0}\n'.format('run_gzip_process'))
            script_file_id.write( 'end\n')
    except Exception as e:
        error_list.append(f'*** EXCEPTION: "{e}".')
        error_list.append('*** ERROR: The file {0} can not be created'.format(get_gzip_process_script(dataset_type_2)))
        OK = False

    # return the control variable and the error list
    return (OK, error_list)
Esempio n. 4
0
def build_infrastructure_software_installation_script(cluster_name):
    '''
    Build the infrastructure software installation script.
    '''

    # initialize the control variable and the error list
    OK = True
    error_list = []

    # get the connetion data
    (user_id, access_key_id,
     secret_access_key) = xconfiguration.get_basic_aws_data()

    # get the old region and user identification
    current_region_name = xconfiguration.get_current_region_name()

    # get the NGScloud config file
    ngscloud_config_file = xconfiguration.get_ngscloud_config_file()

    # get the option dictionary corresponding to the NGScloud config file
    ngscloud_options_dict = xlib.get_option_dict(ngscloud_config_file)

    # get the dataset structure and NGScloud_volume
    dataset_structure = ngscloud_options_dict['dataset info'][
        'dataset_structure']

    # write the infrastructure software installation script
    try:
        if not os.path.exists(
                os.path.dirname(
                    get_infrastructure_software_installation_script())):
            os.makedirs(
                os.path.dirname(
                    get_infrastructure_software_installation_script()))
        with open(get_infrastructure_software_installation_script(),
                  mode='w',
                  encoding='iso-8859-1',
                  newline='\n') as script_file_id:
            script_file_id.write('#!/bin/bash\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write(
                'SEP="#########################################"\n')
            script_file_id.write(
                'export HOST_IP=`curl --silent checkip.amazonaws.com`\n')
            script_file_id.write(
                'export HOST_ADDRESS="ec2-${HOST_IP//./-}-compute-1.amazonaws.com"\n'
            )
            script_file_id.write(
                'export AWS_CONFIG_FILE=/home/ubuntu/.aws/config\n')
            script_file_id.write(
                'export AWS_SHARED_CREDENTIALS_FILE=/home/ubuntu/.aws/credentials\n'
            )
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function init\n')
            script_file_id.write('{\n')
            script_file_id.write('    INIT_DATETIME=`date --utc +%s`\n')
            script_file_id.write(
                '    FORMATTED_INIT_DATETIME=`date --date="@$INIT_DATETIME" "+%Y-%m-%d %H:%M:%S"`\n'
            )
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write(
                '    echo "Script started at $FORMATTED_INIT_DATETIME+00:00."\n'
            )
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write(f'    echo "CLUSTER: {cluster_name}"\n')
            script_file_id.write('    echo "HOST NAME: $HOSTNAME"\n')
            script_file_id.write('    echo "HOST IP: $HOST_IP"\n')
            script_file_id.write('    echo "HOST ADDRESS: $HOST_ADDRESS"\n')
            script_file_id.write('}\n')
            if dataset_structure in [
                    xconfiguration.get_dataset_structure_singlevolume(),
                    xconfiguration.get_dataset_structure_none()
            ]:
                script_file_id.write(
                    '#-------------------------------------------------------------------------------\n'
                )
                script_file_id.write('function create_dataset_structure\n')
                script_file_id.write('{\n')
                script_file_id.write('    echo "$SEP"\n')
                script_file_id.write(
                    '    echo "Creating the dataset structure ..."\n')
                script_file_id.write(
                    f'    sudo mkdir --parents {xlib.get_cluster_app_dir()}\n')
                script_file_id.write(
                    f'    sudo mkdir --parents {xlib.get_cluster_database_dir()}\n'
                )
                script_file_id.write(
                    f'    sudo mkdir --parents {xlib.get_cluster_read_dir()}\n'
                )
                script_file_id.write(
                    f'    sudo mkdir --parents {xlib.get_cluster_reference_dir()}\n'
                )
                script_file_id.write(
                    f'    sudo mkdir --parents {xlib.get_cluster_result_dir()}\n'
                )
                script_file_id.write(
                    '    echo "The dataset structure is created."\n')
                script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function install_awscli\n')
            script_file_id.write('{\n')
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write('    echo "Installing the AWS CLI ..."\n')
            script_file_id.write(f'    unzip {xlib.get_awscli_name()}.zip\n')
            script_file_id.write('    RC=$?\n')
            script_file_id.write('    if [ $RC -ne 0 ]; then unzip $RC; fi\n')
            script_file_id.write('    sudo ./aws/install\n')
            script_file_id.write('    RC=$?\n')
            script_file_id.write(
                '    if [ $RC -ne 0 ]; then install $RC; fi\n')
            script_file_id.write('    rm -rf aws\n')
            script_file_id.write('    RC=$?\n')
            script_file_id.write('    if [ $RC -ne 0 ]; then rm $RC; fi\n')
            script_file_id.write(f'    rm {xlib.get_awscli_name()}.zip\n')
            script_file_id.write('    RC=$?\n')
            script_file_id.write('    if [ $RC -ne 0 ]; then rm $RC; fi\n')
            script_file_id.write('    echo "The package is installed."\n')
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function setup_aws\n')
            script_file_id.write('{\n')
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write('    echo "Setting up AWS ..."\n')
            script_file_id.write('    UBUNTU_AWS_DIR=/home/ubuntu/.aws\n')
            script_file_id.write('    mkdir --parents $UBUNTU_AWS_DIR\n')
            script_file_id.write(f'    CONFIG_FILE=$UBUNTU_AWS_DIR/config\n')
            script_file_id.write('    echo "[default]" > $CONFIG_FILE\n')
            script_file_id.write(
                f'    echo "region = {current_region_name}" >> $CONFIG_FILE\n')
            script_file_id.write(
                '    CREDENTIALS_FILE=$UBUNTU_AWS_DIR/credentials\n')
            script_file_id.write('    echo "[default]" > $CREDENTIALS_FILE\n')
            script_file_id.write(
                f'    echo "aws_access_key_id = {access_key_id}" >> $CREDENTIALS_FILE\n'
            )
            script_file_id.write(
                f'    echo "aws_secret_access_key = {secret_access_key}" >> $CREDENTIALS_FILE\n'
            )
            script_file_id.write('    sudo echo "AWS is set up."\n')
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function fix_source_list\n')
            script_file_id.write('{\n')
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write(
                '    echo "Fixing file /etc/apt/sources.list ..."\n')
            script_file_id.write(
                '    sed -i "s/us-east-1.ec2.archive.ubuntu.com/old-releases.ubuntu.com/g" /etc/apt/sources.list\n'
            )
            script_file_id.write('    RC=$?\n')
            script_file_id.write(
                '    if [ $RC -ne 0 ]; then manage_error sed $RC; fi\n')
            script_file_id.write(
                '    sed -i "s/security.ubuntu.com/old-releases.ubuntu\.com/g" /etc/apt/sources.list\n'
            )
            script_file_id.write('    RC=$?\n')
            script_file_id.write(
                '    if [ $RC -ne 0 ]; then manage_error sed $RC; fi\n')
            script_file_id.write('    apt-get update\n')
            script_file_id.write('    RC=$?\n')
            script_file_id.write(
                '    if [ $RC -ne 0 ]; then manage_error apt-get $RC; fi\n')
            script_file_id.write('    echo\n')
            script_file_id.write('    echo "The file is fixed."\n')
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function install_xorg\n')
            script_file_id.write('{\n')
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write(
                '    echo "Installing the package xorg ..."\n')
            script_file_id.write(
                '    sudo apt-get --assume-yes install xorg\n')
            script_file_id.write('    RC=$?\n')
            script_file_id.write(
                '    if [ $RC -ne 0 ]; then manage_error apt-get $RC; fi\n')
            script_file_id.write('    echo "The package is installed."\n')
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function install_libtbb2\n')
            script_file_id.write('{\n')
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write(
                '    echo "Installing the package libtbb2 ..."\n')
            script_file_id.write('    echo\n')
            script_file_id.write('    apt-get --assume-yes install libtbb2\n')
            script_file_id.write('    RC=$?\n')
            script_file_id.write(
                '    if [ $RC -ne 0 ]; then manage_error apt-get $RC; fi\n')
            script_file_id.write('    echo\n')
            script_file_id.write('    echo "The package is installed."\n')
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function install_libxt6\n')
            script_file_id.write('{\n')
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write(
                '    echo "Installing the package libxt6 ..."\n')
            script_file_id.write(
                '    sudo apt-get --assume-yes install libxt6\n')
            script_file_id.write('    RC=$?\n')
            script_file_id.write(
                '    if [ $RC -ne 0 ]; then manage_error apt-get $RC; fi\n')
            script_file_id.write('    echo "The package is installed."\n')
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function install_parallel\n')
            script_file_id.write('{\n')
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write(
                '    echo "Installing the package parallel ..."\n')
            script_file_id.write(
                '    sudo apt-get --assume-yes install parallel\n')
            script_file_id.write('    RC=$?\n')
            script_file_id.write(
                '    if [ $RC -ne 0 ]; then manage_error apt-get $RC; fi\n')
            script_file_id.write('    echo "The package is installed."\n')
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function install_texlive\n')
            script_file_id.write('{\n')
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write(
                '    echo "Installing the package texlive ..."\n')
            script_file_id.write(
                '    sudo apt-get --assume-yes install texlive-latex-base\n')
            script_file_id.write('    RC=$?\n')
            script_file_id.write(
                '    if [ $RC -ne 0 ]; then manage_error apt-get $RC; fi\n')
            script_file_id.write(
                '    sudo apt-get --assume-yes install texlive-fonts-recommended\n'
            )
            script_file_id.write('    RC=$?\n')
            script_file_id.write(
                '    if [ $RC -ne 0 ]; then manage_error apt-get $RC; fi\n')
            script_file_id.write(
                '    sudo apt-get --assume-yes install texlive-fonts-extra\n')
            script_file_id.write('    RC=$?\n')
            script_file_id.write(
                '    if [ $RC -ne 0 ]; then manage_error apt-get $RC; fi\n')
            script_file_id.write(
                '    sudo apt-get --assume-yes install texlive-latex-extra\n')
            script_file_id.write('    RC=$?\n')
            script_file_id.write(
                '    if [ $RC -ne 0 ]; then manage_error apt-get $RC; fi\n')
            script_file_id.write('    echo "The package is installed."\n')
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function uninstall_mysql\n')
            script_file_id.write('{\n')
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write('    echo "Uninstalling MySQL ..."\n')
            script_file_id.write(
                '    sudo apt-get purge --auto-remove --assume-yes mysql-client mysql-client-5.5 mysql-client-core-5.5 mysql-common mysql-server mysql-server-5.5 mysql-server-core-5.5\n'
            )
            script_file_id.write('    RC=$?\n')
            script_file_id.write(
                '    if [ $RC -ne 0 ]; then manage_error apt-get $RC; fi\n')
            script_file_id.write('    echo "MySQL is uninstalled."\n')
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function create_swapfile\n')
            script_file_id.write('{\n')
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write(
                '    echo "Creating a file which will be used for swap ..."\n')
            script_file_id.write(
                '    sudo dd if=/dev/zero of=/swapfile bs=1024 count=2097152\n'
            )
            script_file_id.write('    RC=$?\n')
            script_file_id.write(
                '    if [ $RC -ne 0 ]; then manage_error dd $RC; fi\n')
            script_file_id.write('    sudo chmod 600 /swapfile\n')
            script_file_id.write('    RC=$?\n')
            script_file_id.write(
                '    if [ $RC -ne 0 ]; then manage_error chmod $RC; fi\n')
            script_file_id.write('    sudo mkswap /swapfile\n')
            script_file_id.write('    RC=$?\n')
            script_file_id.write(
                '    if [ $RC -ne 0 ]; then manage_error mkswap $RC; fi\n')
            script_file_id.write('    sudo swapon /swapfile\n')
            script_file_id.write('    RC=$?\n')
            script_file_id.write(
                '    if [ $RC -ne 0 ]; then manage_error swapon $RC; fi\n')
            script_file_id.write(
                '    sudo echo "/swapfile swap swap defaults 0 0" >> /etc/fstab\n'
            )
            script_file_id.write('    RC=$?\n')
            script_file_id.write(
                '    if [ $RC -ne 0 ]; then manage_error echo $RC; fi\n')
            script_file_id.write('    echo\n')
            script_file_id.write('    echo "The file is created."\n')
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function end\n')
            script_file_id.write('{\n')
            script_file_id.write('    END_DATETIME=`date --utc +%s`\n')
            script_file_id.write(
                '    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`\n'
            )
            script_file_id.write('    calculate_duration\n')
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write(
                '    echo "Script ended OK at $FORMATTED_END_DATETIME+00:00 with a run duration of $DURATION s ($FORMATTED_DURATION)."\n'
            )
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write('    send_mail ok\n')
            script_file_id.write('    exit 0\n')
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function manage_error\n')
            script_file_id.write('{\n')
            script_file_id.write('    END_DATETIME=`date --utc +%s`\n')
            script_file_id.write(
                '    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`\n'
            )
            script_file_id.write('    calculate_duration\n')
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write('    echo "ERROR: $1 returned error $2"\n')
            script_file_id.write(
                '    echo "Script ended WRONG at $FORMATTED_END_DATETIME+00:00 with a run duration of $DURATION s ($FORMATTED_DURATION)."\n'
            )
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write('    send_mail wrong\n')
            script_file_id.write('    exit 3\n')
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            process_name = 'Infrastructure software installation'
            mail_message_ok = xlib.get_mail_message_ok(process_name,
                                                       cluster_name)
            mail_message_wrong = xlib.get_mail_message_wrong(
                process_name, cluster_name)
            script_file_id.write('function send_mail\n')
            script_file_id.write('{\n')
            script_file_id.write(
                f'    SUBJECT="{xlib.get_project_name()}: {process_name}"\n')
            script_file_id.write('    if [ "$1" == "ok" ]; then\n')
            script_file_id.write(f'        MESSAGE="{mail_message_ok}"\n')
            script_file_id.write('    elif [ "$1" == "wrong" ]; then\n')
            script_file_id.write(f'        MESSAGE="{mail_message_wrong}"\n')
            script_file_id.write('    else\n')
            script_file_id.write('         MESSAGE=""\n')
            script_file_id.write('    fi\n')
            script_file_id.write(
                '    DESTINATION_FILE=mail-destination.json\n')
            script_file_id.write('    echo "{" > $DESTINATION_FILE\n')
            script_file_id.write(
                f'    echo "    \\\"ToAddresses\\\":  [\\\"{xconfiguration.get_contact_data()}\\\"]," >> $DESTINATION_FILE\n'
            )
            script_file_id.write(
                '    echo "    \\\"CcAddresses\\\":  []," >> $DESTINATION_FILE\n'
            )
            script_file_id.write(
                '    echo "    \\\"BccAddresses\\\":  []" >> $DESTINATION_FILE\n'
            )
            script_file_id.write('    echo "}" >> $DESTINATION_FILE\n')
            script_file_id.write('    MESSAGE_FILE=mail-message.json\n')
            script_file_id.write('    echo "{" > $MESSAGE_FILE\n')
            script_file_id.write(
                '    echo "    \\\"Subject\\\": {" >> $MESSAGE_FILE\n')
            script_file_id.write(
                '    echo "        \\\"Data\\\":  \\\"$SUBJECT\\\"," >> $MESSAGE_FILE\n'
            )
            script_file_id.write(
                '    echo "        \\\"Charset\\\":  \\\"UTF-8\\\"" >> $MESSAGE_FILE\n'
            )
            script_file_id.write('    echo "    }," >> $MESSAGE_FILE\n')
            script_file_id.write(
                '    echo "    \\\"Body\\\": {" >> $MESSAGE_FILE\n')
            script_file_id.write(
                '    echo "        \\\"Html\\\": {" >> $MESSAGE_FILE\n')
            script_file_id.write(
                '    echo "            \\\"Data\\\":  \\\"$MESSAGE\\\"," >> $MESSAGE_FILE\n'
            )
            script_file_id.write(
                '    echo "            \\\"Charset\\\":  \\\"UTF-8\\\"" >> $MESSAGE_FILE\n'
            )
            script_file_id.write('    echo "        }" >> $MESSAGE_FILE\n')
            script_file_id.write('    echo "    }" >> $MESSAGE_FILE\n')
            script_file_id.write('    echo "}" >> $MESSAGE_FILE\n')
            script_file_id.write(
                f'    aws ses send-email --from {xconfiguration.get_contact_data()} --destination file://$DESTINATION_FILE --message file://$MESSAGE_FILE\n'
            )
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function calculate_duration\n')
            script_file_id.write('{\n')
            script_file_id.write(
                '    DURATION=`expr $END_DATETIME - $INIT_DATETIME`\n')
            script_file_id.write('    HH=`expr $DURATION / 3600`\n')
            script_file_id.write('    MM=`expr $DURATION % 3600 / 60`\n')
            script_file_id.write('    SS=`expr $DURATION % 60`\n')
            script_file_id.write(
                '    FORMATTED_DURATION=`printf "%03d:%02d:%02d\\n" $HH $MM $SS`\n'
            )
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('init\n')
            if dataset_structure in [
                    xconfiguration.get_dataset_structure_singlevolume(),
                    xconfiguration.get_dataset_structure_none()
            ]:
                script_file_id.write('create_dataset_structure\n')
            script_file_id.write('install_awscli\n')
            script_file_id.write('setup_aws\n')
            script_file_id.write('fix_source_list\n')
            script_file_id.write('install_xorg\n')
            script_file_id.write('install_libtbb2\n')
            script_file_id.write('install_libxt6\n')
            script_file_id.write('install_parallel\n')
            script_file_id.write('install_texlive\n')
            script_file_id.write('uninstall_mysql\n')
            # -- script_file_id.write( 'create_swapfile\n')
            script_file_id.write('end\n')
    except Exception as e:
        error_list.append(f'*** EXCEPTION: "{e}".')
        error_list.append(
            f'*** ERROR: The file {get_infrastructure_software_installation_script()} can not be created'
        )
        OK = False

    # return the control variable and the error list
    return (OK, error_list)
Esempio n. 5
0
def build_cd_hit_est_process_script(cluster_name, current_run_dir):
    '''
    Build the current CD-HIT-EST process script.
    '''

    # initialize the control variable and the error list
    OK = True
    error_list = []

    # get the option dictionary
    cd_hit_est_option_dict = xlib.get_option_dict(get_cd_hit_est_config_file())

    # get the options
    experiment_id = cd_hit_est_option_dict['identification']['experiment_id']
    assembly_software = cd_hit_est_option_dict['identification'][
        'assembly_software']
    assembly_dataset_id = cd_hit_est_option_dict['identification'][
        'assembly_dataset_id']
    assembly_type = cd_hit_est_option_dict['identification']['assembly_type']
    threads = cd_hit_est_option_dict['CD-HIT-EST parameters']['threads']
    memory_limit = cd_hit_est_option_dict['CD-HIT-EST parameters'][
        'memory_limit']
    seq_identity_threshold = cd_hit_est_option_dict['CD-HIT-EST parameters'][
        'seq_identity_threshold']
    word_length = cd_hit_est_option_dict['CD-HIT-EST parameters'][
        'word_length']
    mask = cd_hit_est_option_dict['CD-HIT-EST parameters']['mask']
    match = cd_hit_est_option_dict['CD-HIT-EST parameters']['match']
    mismatch = cd_hit_est_option_dict['CD-HIT-EST parameters']['mismatch']
    other_parameters = cd_hit_est_option_dict['CD-HIT-EST parameters'][
        'other_parameters']

    # set the transcriptome file path
    if assembly_software == xlib.get_soapdenovotrans_code():
        if assembly_type == 'CONTIGS':
            transcriptome_file = f'{xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id)}/{experiment_id}-{assembly_dataset_id}.contig'
        elif assembly_type == 'SCAFFOLDS':
            transcriptome_file = f'{xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id)}/{experiment_id}-{assembly_dataset_id}.scafSeq'
    elif assembly_software == xlib.get_transabyss_code():
        transcriptome_file = f'{xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id)}/transabyss-final.fa'
    elif assembly_software == xlib.get_trinity_code():
        transcriptome_file = f'{xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id)}/Trinity.fasta'
    elif assembly_software == xlib.get_ggtrinity_code():
        transcriptome_file = f'{xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id)}/Trinity-GG.fasta'
    elif assembly_software == xlib.get_cd_hit_est_code():
        transcriptome_file = f'{xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id)}/clustered-transcriptome.fasta'
    elif assembly_software == xlib.get_transcript_filter_code():
        transcriptome_file = f'{xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id)}/filtered-transcriptome.fasta'

    # set the output file path
    if OK:
        output_file = f'{current_run_dir}/clustered-transcriptome.fasta'

    # write the CD-HIT-EST process script
    try:
        if not os.path.exists(os.path.dirname(
                get_cd_hit_est_process_script())):
            os.makedirs(os.path.dirname(get_cd_hit_est_process_script()))
        with open(get_cd_hit_est_process_script(),
                  mode='w',
                  encoding='iso-8859-1',
                  newline='\n') as script_file_id:
            script_file_id.write('#!/bin/bash\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write(
                'SEP="#########################################"\n')
            script_file_id.write(
                'export HOST_IP=`curl --silent checkip.amazonaws.com`\n')
            script_file_id.write(
                'export HOST_ADDRESS="ec2-${HOST_IP//./-}-compute-1.amazonaws.com"\n'
            )
            script_file_id.write(
                'export AWS_CONFIG_FILE=/home/ubuntu/.aws/config\n')
            script_file_id.write(
                'export AWS_SHARED_CREDENTIALS_FILE=/home/ubuntu/.aws/credentials\n'
            )
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write(
                f'MINICONDA3_BIN_PATH={xlib.get_cluster_app_dir()}/{xlib.get_miniconda3_name()}/bin\n'
            )
            script_file_id.write(f'export PATH=$MINICONDA3_BIN_PATH:$PATH\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write(
                f'STATUS_DIR={xlib.get_status_dir(current_run_dir)}\n')
            script_file_id.write(
                f'SCRIPT_STATUS_OK={xlib.get_status_ok(current_run_dir)}\n')
            script_file_id.write(
                f'SCRIPT_STATUS_WRONG={xlib.get_status_wrong(current_run_dir)}\n'
            )
            script_file_id.write('mkdir --parents $STATUS_DIR\n')
            script_file_id.write(
                'if [ -f $SCRIPT_STATUS_OK ]; then rm $SCRIPT_STATUS_OK; fi\n')
            script_file_id.write(
                'if [ -f $SCRIPT_STATUS_WRONG ]; then rm $SCRIPT_STATUS_WRONG; fi\n'
            )
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write(f'CURRENT_DIR={current_run_dir}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function init\n')
            script_file_id.write('{\n')
            script_file_id.write('    INIT_DATETIME=`date --utc +%s`\n')
            script_file_id.write(
                '    FORMATTED_INIT_DATETIME=`date --date="@$INIT_DATETIME" "+%Y-%m-%d %H:%M:%S"`\n'
            )
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write(
                '    echo "Script started at $FORMATTED_INIT_DATETIME+00:00."\n'
            )
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write(f'    echo "CLUSTER: {cluster_name}"\n')
            script_file_id.write('    echo "HOST NAME: $HOSTNAME"\n')
            script_file_id.write('    echo "HOST IP: $HOST_IP"\n')
            script_file_id.write('    echo "HOST ADDRESS: $HOST_ADDRESS"\n')
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function run_cd_hit_est_process\n')
            script_file_id.write('{\n')
            script_file_id.write(
                f'    source activate {xlib.get_cd_hit_anaconda_code()}\n')
            script_file_id.write('    cd $CURRENT_DIR\n')
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write('    echo "Filtering transcriptome ..."\n')
            script_file_id.write('    /usr/bin/time \\\n')
            script_file_id.write(
                f'        --format="{xlib.get_time_output_format()}" \\\n')
            script_file_id.write('        cd-hit-est \\\n')
            script_file_id.write(f'            -T {threads} \\\n')
            script_file_id.write(f'            -M {memory_limit} \\\n')
            script_file_id.write(f'            -i {transcriptome_file} \\\n')
            script_file_id.write(
                f'            -c {seq_identity_threshold} \\\n')
            script_file_id.write(f'            -n {word_length} \\\n')
            script_file_id.write(f'            -mask {mask} \\\n')
            script_file_id.write(f'            -match {match} \\\n')
            script_file_id.write(f'            -mismatch {mismatch} \\\n')
            if other_parameters.upper() == 'NONE':
                script_file_id.write(f'            -o {output_file}\n')
            else:
                script_file_id.write(f'            -o {output_file} \\\n')
                parameter_list = [
                    x.strip() for x in other_parameters.split(';')
                ]
                for i in range(len(parameter_list)):
                    if parameter_list[i].find('=') > 0:
                        pattern = r'^--(.+)=(.+)$'
                        mo = re.search(pattern, parameter_list[i])
                        parameter_name = mo.group(1).strip()
                        parameter_value = mo.group(2).strip()
                        if i < len(parameter_list) - 1:
                            script_file_id.write(
                                f'            -{parameter_name} {parameter_value} \\\n'
                            )
                        else:
                            script_file_id.write(
                                f'            -{parameter_name} {parameter_value}\n'
                            )
                    else:
                        pattern = r'^--(.+)$'
                        mo = re.search(pattern, parameter_list[i])
                        parameter_name = mo.group(1).strip()
                        if i < len(parameter_list):
                            script_file_id.write(
                                f'            -{parameter_name} \\\n')
                        else:
                            script_file_id.write(
                                f'            -{parameter_name}\n')
                    i += 1
            script_file_id.write('    RC=$?\n')
            script_file_id.write(
                '    if [ $RC -ne 0 ]; then manage_error cd-hit-est $RC; fi\n')
            script_file_id.write('    echo "The transcriptome is filtered."\n')
            script_file_id.write('    conda deactivate\n')
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function end\n')
            script_file_id.write('{\n')
            script_file_id.write('    END_DATETIME=`date --utc +%s`\n')
            script_file_id.write(
                '    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`\n'
            )
            script_file_id.write('    calculate_duration\n')
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write(
                '    echo "Script ended OK at $FORMATTED_END_DATETIME+00:00 with a run duration of $DURATION s ($FORMATTED_DURATION)."\n'
            )
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write('    send_mail ok\n')
            script_file_id.write('    touch $SCRIPT_STATUS_OK\n')
            script_file_id.write('    exit 0\n')
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function manage_error\n')
            script_file_id.write('{\n')
            script_file_id.write('    END_DATETIME=`date --utc +%s`\n')
            script_file_id.write(
                '    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`\n'
            )
            script_file_id.write('    calculate_duration\n')
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write('    echo "ERROR: $1 returned error $2"\n')
            script_file_id.write(
                '    echo "Script ended WRONG at $FORMATTED_END_DATETIME+00:00 with a run duration of $DURATION s ($FORMATTED_DURATION)."\n'
            )
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write('    send_mail wrong\n')
            script_file_id.write('    touch $SCRIPT_STATUS_WRONG\n')
            script_file_id.write('    exit 3\n')
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            process_name = f'{xlib.get_cd_hit_est_name()} process'
            mail_message_ok = xlib.get_mail_message_ok(process_name,
                                                       cluster_name)
            mail_message_wrong = xlib.get_mail_message_wrong(
                process_name, cluster_name)
            script_file_id.write('function send_mail\n')
            script_file_id.write('{\n')
            script_file_id.write(
                f'    SUBJECT="{xlib.get_project_name()}: {process_name}"\n')
            script_file_id.write('    if [ "$1" == "ok" ]; then\n')
            script_file_id.write(f'        MESSAGE="{mail_message_ok}"\n')
            script_file_id.write('    elif [ "$1" == "wrong" ]; then\n')
            script_file_id.write(f'        MESSAGE="{mail_message_wrong}"\n')
            script_file_id.write('    else\n')
            script_file_id.write('         MESSAGE=""\n')
            script_file_id.write('    fi\n')
            script_file_id.write(
                '    DESTINATION_FILE=mail-destination.json\n')
            script_file_id.write('    echo "{" > $DESTINATION_FILE\n')
            script_file_id.write(
                f'    echo "    \\\"ToAddresses\\\":  [\\\"{xconfiguration.get_contact_data()}\\\"]," >> $DESTINATION_FILE\n'
            )
            script_file_id.write(
                '    echo "    \\\"CcAddresses\\\":  []," >> $DESTINATION_FILE\n'
            )
            script_file_id.write(
                '    echo "    \\\"BccAddresses\\\":  []" >> $DESTINATION_FILE\n'
            )
            script_file_id.write('    echo "}" >> $DESTINATION_FILE\n')
            script_file_id.write('    MESSAGE_FILE=mail-message.json\n')
            script_file_id.write('    echo "{" > $MESSAGE_FILE\n')
            script_file_id.write(
                '    echo "    \\\"Subject\\\": {" >> $MESSAGE_FILE\n')
            script_file_id.write(
                '    echo "        \\\"Data\\\":  \\\"$SUBJECT\\\"," >> $MESSAGE_FILE\n'
            )
            script_file_id.write(
                '    echo "        \\\"Charset\\\":  \\\"UTF-8\\\"" >> $MESSAGE_FILE\n'
            )
            script_file_id.write('    echo "    }," >> $MESSAGE_FILE\n')
            script_file_id.write(
                '    echo "    \\\"Body\\\": {" >> $MESSAGE_FILE\n')
            script_file_id.write(
                '    echo "        \\\"Html\\\": {" >> $MESSAGE_FILE\n')
            script_file_id.write(
                '    echo "            \\\"Data\\\":  \\\"$MESSAGE\\\"," >> $MESSAGE_FILE\n'
            )
            script_file_id.write(
                '    echo "            \\\"Charset\\\":  \\\"UTF-8\\\"" >> $MESSAGE_FILE\n'
            )
            script_file_id.write('    echo "        }" >> $MESSAGE_FILE\n')
            script_file_id.write('    echo "    }" >> $MESSAGE_FILE\n')
            script_file_id.write('    echo "}" >> $MESSAGE_FILE\n')
            script_file_id.write(
                f'    aws ses send-email --from {xconfiguration.get_contact_data()} --destination file://$DESTINATION_FILE --message file://$MESSAGE_FILE\n'
            )
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function calculate_duration\n')
            script_file_id.write('{\n')
            script_file_id.write(
                '    DURATION=`expr $END_DATETIME - $INIT_DATETIME`\n')
            script_file_id.write('    HH=`expr $DURATION / 3600`\n')
            script_file_id.write('    MM=`expr $DURATION % 3600 / 60`\n')
            script_file_id.write('    SS=`expr $DURATION % 60`\n')
            script_file_id.write(
                '    FORMATTED_DURATION=`printf "%03d:%02d:%02d\\n" $HH $MM $SS`\n'
            )
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('init\n')
            script_file_id.write('run_cd_hit_est_process\n')
            script_file_id.write('end\n')
    except Exception as e:
        error_list.append(f'*** EXCEPTION: "{e}".')
        error_list.append(
            f'*** ERROR: The file {get_cd_hit_est_process_script()} can not be created'
        )
        OK = False

    # return the control variable and the error list
    return (OK, error_list)
Esempio n. 6
0
def build_express_process_script(cluster_name, current_run_dir):
    '''
    Build the current eXpress process script.
    '''

    # initialize the control variable and the error list
    OK = True
    error_list = []

    # get the eXpress option dictionary
    express_option_dict = xlib.get_option_dict(get_express_config_file())

    # get the options
    experiment_id = express_option_dict['identification']['experiment_id']
    assembly_software = express_option_dict['identification']['assembly_software']
    assembly_dataset_id = express_option_dict['identification']['assembly_dataset_id']
    assembly_type = express_option_dict['identification']['assembly_type']
    frag_len_mean = express_option_dict['eXpress parameters']['frag-len-mean']
    frag_len_stddev = express_option_dict['eXpress parameters']['frag-len-stddev']
    library_type = express_option_dict['eXpress parameters']['library_type']
    max_indel_size = express_option_dict['eXpress parameters']['max-indel-size']
    no_bias_correct = express_option_dict['eXpress parameters']['no-bias-correct']
    no_error_model = express_option_dict['eXpress parameters']['no-error-model']
    other_parameters = express_option_dict['eXpress parameters']['other_parameters']

    # get the sections list
    sections_list = []
    for section in express_option_dict.keys():
        sections_list.append(section)
    sections_list.sort()

    # build alignment dataset identification list
    alignment_software_list = []
    alignment_dataset_id_list = []
    for section in sections_list:
        # if the section identification is like library-n
        if re.match('^alignment-dataset-[0-9]+$', section):
            alignment_software_list.append(express_option_dict[section]['alignment_software'])
            alignment_dataset_id_list.append(express_option_dict[section]['alignment_dataset_id'])

    # set the transcriptome file path
    if assembly_software == xlib.get_soapdenovotrans_code():
        if assembly_type == 'CONTIGS':
            transcriptome_file = f'{xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id)}/{experiment_id}-{assembly_dataset_id}.contig'
        elif  assembly_type == 'SCAFFOLDS':
            transcriptome_file = f'{xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id)}/{experiment_id}-{assembly_dataset_id}.scafSeq'
    elif assembly_software == xlib.get_transabyss_code():
        transcriptome_file = f'{xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id)}/transabyss-final.fa'
    elif assembly_software == xlib.get_trinity_code():
        transcriptome_file = f'{xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id)}/Trinity.fasta'
    elif assembly_software == xlib.get_ggtrinity_code():
        transcriptome_file = f'{xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id)}/Trinity-GG.fasta'
    elif assembly_software == xlib.get_cd_hit_est_code():
        transcriptome_file = f'{xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id)}/clustered-transcriptome.fasta'
    elif assembly_software == xlib.get_transcript_filter_code():
        transcriptome_file = f'{xlib.get_cluster_experiment_result_dataset_dir(experiment_id, assembly_dataset_id)}/filtered-transcriptome.fasta'

    # write the eXpress process script
    try:
        if not os.path.exists(os.path.dirname(get_express_process_script())):
            os.makedirs(os.path.dirname(get_express_process_script()))
        with open(get_express_process_script(), mode='w', encoding='iso-8859-1', newline='\n') as script_file_id:
            script_file_id.write( '#!/bin/bash\n')
            script_file_id.write( '#-------------------------------------------------------------------------------\n')
            script_file_id.write( 'SEP="#########################################"\n')
            script_file_id.write( 'export HOST_IP=`curl --silent checkip.amazonaws.com`\n')
            script_file_id.write( 'export HOST_ADDRESS="ec2-${HOST_IP//./-}-compute-1.amazonaws.com"\n')
            script_file_id.write( 'export AWS_CONFIG_FILE=/home/ubuntu/.aws/config\n')
            script_file_id.write( 'export AWS_SHARED_CREDENTIALS_FILE=/home/ubuntu/.aws/credentials\n')
            script_file_id.write( '#-------------------------------------------------------------------------------\n')
            script_file_id.write(f'MINICONDA3_BIN_PATH={xlib.get_cluster_app_dir()}/{xlib.get_miniconda3_name()}/bin\n')
            script_file_id.write(f'export PATH=$MINICONDA3_BIN_PATH:$PATH\n')
            script_file_id.write( '#-------------------------------------------------------------------------------\n')
            script_file_id.write(f'CURRENT_DIR={current_run_dir}\n')
            script_file_id.write( '#-------------------------------------------------------------------------------\n')
            script_file_id.write(f'STATUS_DIR={xlib.get_status_dir(current_run_dir)}\n')
            script_file_id.write(f'SCRIPT_STATUS_OK={xlib.get_status_ok(current_run_dir)}\n')
            script_file_id.write(f'SCRIPT_STATUS_WRONG={xlib.get_status_wrong(current_run_dir)}\n')
            script_file_id.write( 'mkdir --parents $STATUS_DIR\n')
            script_file_id.write( 'if [ -f $SCRIPT_STATUS_OK ]; then rm $SCRIPT_STATUS_OK; fi\n')
            script_file_id.write( 'if [ -f $SCRIPT_STATUS_WRONG ]; then rm $SCRIPT_STATUS_WRONG; fi\n')
            script_file_id.write( '#-------------------------------------------------------------------------------\n')
            script_file_id.write( 'function init\n')
            script_file_id.write( '{\n')
            script_file_id.write( '    INIT_DATETIME=`date --utc +%s`\n')
            script_file_id.write( '    FORMATTED_INIT_DATETIME=`date --date="@$INIT_DATETIME" "+%Y-%m-%d %H:%M:%S"`\n')
            script_file_id.write( '    echo "$SEP"\n')
            script_file_id.write( '    echo "Script started at $FORMATTED_INIT_DATETIME+00:00."\n')
            script_file_id.write( '    echo "$SEP"\n')
            script_file_id.write(f'    echo "CLUSTER: {cluster_name}"\n')
            script_file_id.write( '    echo "HOST NAME: $HOSTNAME"\n')
            script_file_id.write( '    echo "HOST IP: $HOST_IP"\n')
            script_file_id.write( '    echo "HOST ADDRESS: $HOST_ADDRESS"\n')
            script_file_id.write( '}\n')
            script_file_id.write( '#-------------------------------------------------------------------------------\n')
            script_file_id.write( 'function run_express_process\n')
            script_file_id.write( '{\n')
            script_file_id.write(f'    source activate {xlib.get_express_anaconda_code()}\n')
            script_file_id.write(f'    cd $CURRENT_DIR\n')
            for i in range(len(alignment_dataset_id_list)):
                alignment_files = f'{xlib.get_cluster_experiment_result_dataset_dir(experiment_id, alignment_dataset_id_list[i])}/*.sorted.bam'
                script_file_id.write(f'    SORTED_BAM_LIST={alignment_dataset_id_list[i]}-sorted-bam-files.txt\n')
                script_file_id.write(f'    ls {alignment_files} > $SORTED_BAM_LIST\n')
                script_file_id.write( '    while read FILE_BAM; do\n')
                script_file_id.write( '        NAME=`basename $FILE_BAM`\n')
                script_file_id.write( '        NAME=${NAME:0:-11}\n')
                script_file_id.write(f'        SUBDIR={alignment_dataset_id_list[i]}-$NAME\n')
                script_file_id.write(f'        mkdir --parents $CURRENT_DIR/$SUBDIR\n')
                script_file_id.write( '        echo "$SEP"\n')
                script_file_id.write(f'        echo "Quantitating alignment dataset {alignment_dataset_id_list[i]} - library $SUBDIR ..."\n')
                script_file_id.write( '        /usr/bin/time \\\n')
                script_file_id.write(f'            --format="{xlib.get_time_output_format(separator=False)}" \\\n')
                script_file_id.write( '            express \\\n')
                script_file_id.write( '                --no-update-check \\\n')
                script_file_id.write(f'                --frag-len-mean {frag_len_mean} \\\n')
                script_file_id.write(f'                --frag-len-stddev {frag_len_stddev} \\\n')
                if library_type.lower() == 'fr-stranded':
                    script_file_id.write( '                --fr-stranded \\\n')
                elif library_type.lower() == 'rf-stranded':
                    script_file_id.write( '                --rf-stranded \\\n')
                elif library_type.lower() == 'f-stranded':
                    script_file_id.write( '                --f-stranded \\\n')
                elif library_type.lower() == 'r-stranded':
                    script_file_id.write( '                --r-stranded \\\n')
                script_file_id.write(f'                --max-indel-size {max_indel_size} \\\n')
                if no_bias_correct.upper() == 'YES':
                    script_file_id.write( '                --no-bias-correct \\\n')
                if no_error_model.upper() == 'YES':
                    script_file_id.write( '                --no-error-model \\\n')
                if other_parameters.upper() != 'NONE':
                    parameter_list = [x.strip() for x in other_parameters.split(';')]
                    for i in range(len(parameter_list)):
                        if parameter_list[i].find('=') > 0:
                            pattern = r'^--(.+)=(.+)$'
                            mo = re.search(pattern, parameter_list[i])
                            parameter_name = mo.group(1).strip()
                            parameter_value = mo.group(2).strip()
                            script_file_id.write(f'                --{parameter_name}={parameter_value} \\\n')
                        else:
                            pattern = r'^--(.+)$'
                            mo = re.search(pattern, parameter_list[i])
                            parameter_name = mo.group(1).strip()
                            script_file_id.write(f'                --{parameter_name} \\\n')
                script_file_id.write( '                --output-dir $CURRENT_DIR/$SUBDIR \\\n')
                script_file_id.write(f'                {transcriptome_file} \\\n')
                script_file_id.write( '                $FILE_BAM\n')
                script_file_id.write( '        RC=$?\n')
                script_file_id.write( '        if [ $RC -ne 0 ]; then manage_error express $RC; fi\n')
                script_file_id.write( '        echo "Quantitation is done."\n')
                script_file_id.write( '    done < $SORTED_BAM_LIST\n')
            script_file_id.write( '    conda deactivate\n')
            script_file_id.write( '}\n')
            script_file_id.write( '#-------------------------------------------------------------------------------\n')
            script_file_id.write( 'function end\n')
            script_file_id.write( '{\n')
            script_file_id.write( '    END_DATETIME=`date --utc +%s`\n')
            script_file_id.write( '    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`\n')
            script_file_id.write( '    calculate_duration\n')
            script_file_id.write( '    echo "$SEP"\n')
            script_file_id.write( '    echo "Script ended OK at $FORMATTED_END_DATETIME+00:00 with a run duration of $DURATION s ($FORMATTED_DURATION)."\n')
            script_file_id.write( '    echo "$SEP"\n')
            script_file_id.write( '    send_mail ok\n')
            script_file_id.write( '    touch $SCRIPT_STATUS_OK\n')
            script_file_id.write( '    exit 0\n')
            script_file_id.write( '}\n')
            script_file_id.write( '#-------------------------------------------------------------------------------\n')
            script_file_id.write( 'function manage_error\n')
            script_file_id.write( '{\n')
            script_file_id.write( '    END_DATETIME=`date --utc +%s`\n')
            script_file_id.write( '    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`\n')
            script_file_id.write( '    calculate_duration\n')
            script_file_id.write( '    echo "$SEP"\n')
            script_file_id.write( '    echo "ERROR: $1 returned error $2"\n')
            script_file_id.write( '    echo "Script ended WRONG at $FORMATTED_END_DATETIME+00:00 with a run duration of $DURATION s ($FORMATTED_DURATION)."\n')
            script_file_id.write( '    echo "$SEP"\n')
            script_file_id.write( '    send_mail wrong\n')
            script_file_id.write( '    touch $SCRIPT_STATUS_WRONG\n')
            script_file_id.write( '    exit 3\n')
            script_file_id.write( '}\n')
            script_file_id.write( '#-------------------------------------------------------------------------------\n')
            process_name = f'{xlib.get_express_name()} process'
            mail_message_ok = xlib.get_mail_message_ok(process_name, cluster_name)
            mail_message_wrong = xlib.get_mail_message_wrong(process_name, cluster_name)
            script_file_id.write( 'function send_mail\n')
            script_file_id.write( '{\n')
            script_file_id.write(f'    SUBJECT="{xlib.get_project_name()}: {process_name}"\n')
            script_file_id.write( '    if [ "$1" == "ok" ]; then\n')
            script_file_id.write(f'        MESSAGE="{mail_message_ok}"\n')
            script_file_id.write( '    elif [ "$1" == "wrong" ]; then\n')
            script_file_id.write(f'        MESSAGE="{mail_message_wrong}"\n')
            script_file_id.write( '    else\n')
            script_file_id.write( '         MESSAGE=""\n')
            script_file_id.write( '    fi\n')
            script_file_id.write( '    DESTINATION_FILE=mail-destination.json\n')
            script_file_id.write( '    echo "{" > $DESTINATION_FILE\n')
            script_file_id.write(f'    echo "    \\\"ToAddresses\\\":  [\\\"{xconfiguration.get_contact_data()}\\\"]," >> $DESTINATION_FILE\n')
            script_file_id.write( '    echo "    \\\"CcAddresses\\\":  []," >> $DESTINATION_FILE\n')
            script_file_id.write( '    echo "    \\\"BccAddresses\\\":  []" >> $DESTINATION_FILE\n')
            script_file_id.write( '    echo "}" >> $DESTINATION_FILE\n')
            script_file_id.write( '    MESSAGE_FILE=mail-message.json\n')
            script_file_id.write( '    echo "{" > $MESSAGE_FILE\n')
            script_file_id.write( '    echo "    \\\"Subject\\\": {" >> $MESSAGE_FILE\n')
            script_file_id.write( '    echo "        \\\"Data\\\":  \\\"$SUBJECT\\\"," >> $MESSAGE_FILE\n')
            script_file_id.write( '    echo "        \\\"Charset\\\":  \\\"UTF-8\\\"" >> $MESSAGE_FILE\n')
            script_file_id.write( '    echo "    }," >> $MESSAGE_FILE\n')
            script_file_id.write( '    echo "    \\\"Body\\\": {" >> $MESSAGE_FILE\n')
            script_file_id.write( '    echo "        \\\"Html\\\": {" >> $MESSAGE_FILE\n')
            script_file_id.write( '    echo "            \\\"Data\\\":  \\\"$MESSAGE\\\"," >> $MESSAGE_FILE\n')
            script_file_id.write( '    echo "            \\\"Charset\\\":  \\\"UTF-8\\\"" >> $MESSAGE_FILE\n')
            script_file_id.write( '    echo "        }" >> $MESSAGE_FILE\n')
            script_file_id.write( '    echo "    }" >> $MESSAGE_FILE\n')
            script_file_id.write( '    echo "}" >> $MESSAGE_FILE\n')
            script_file_id.write(f'    aws ses send-email --from {xconfiguration.get_contact_data()} --destination file://$DESTINATION_FILE --message file://$MESSAGE_FILE\n')
            script_file_id.write( '}\n')
            script_file_id.write( '#-------------------------------------------------------------------------------\n')
            script_file_id.write( 'function calculate_duration\n')
            script_file_id.write( '{\n')
            script_file_id.write( '    DURATION=`expr $END_DATETIME - $INIT_DATETIME`\n')
            script_file_id.write( '    HH=`expr $DURATION / 3600`\n')
            script_file_id.write( '    MM=`expr $DURATION % 3600 / 60`\n')
            script_file_id.write( '    SS=`expr $DURATION % 60`\n')
            script_file_id.write( '    FORMATTED_DURATION=`printf "%03d:%02d:%02d\\n" $HH $MM $SS`\n')
            script_file_id.write( '}\n')
            script_file_id.write( '#-------------------------------------------------------------------------------\n')
            script_file_id.write( 'init\n')
            script_file_id.write( 'run_express_process\n')
            script_file_id.write( 'end\n')
    except Exception as e:
        error_list.append(f'*** EXCEPTION: "{e}".')
        error_list.append(f'*** ERROR: The file {get_express_process_script()} can not be created')
        OK = False

    # return the control variable and the error list
    return (OK, error_list)
Esempio n. 7
0
def build_fastqc_process_script(cluster_name, current_run_dir):
    '''
    Build the current FastQC process script.
    '''

    # initialize the control variable and the error list
    OK = True
    error_list = []

    # get the FastQC option dictionary
    fastqc_option_dict = xlib.get_option_dict(get_fastqc_config_file())

    # get the options
    experiment_id = fastqc_option_dict['identification']['experiment_id']
    read_dataset_id = fastqc_option_dict['identification']['read_dataset_id']
    threads = fastqc_option_dict['FastQC parameters']['threads']

    # get the sections list
    sections_list = []
    for section in fastqc_option_dict.keys():
        sections_list.append(section)
    sections_list.sort()

    # build the file name list
    file_name_list = []
    for section in sections_list:
        # if the section identification is like library-n
        if re.match('^file-[0-9]+$', section):
            file_name = fastqc_option_dict[section]['file_name']
            file_name_list.append(file_name)

    # write the FastQC process script
    try:
        if not os.path.exists(os.path.dirname(get_fastqc_process_script())):
            os.makedirs(os.path.dirname(get_fastqc_process_script()))
        with open(get_fastqc_process_script(),
                  mode='w',
                  encoding='iso-8859-1',
                  newline='\n') as script_file_id:
            script_file_id.write('#!/bin/bash\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write(
                'SEP="#########################################"\n')
            script_file_id.write(
                'export HOST_IP=`curl --silent checkip.amazonaws.com`\n')
            script_file_id.write(
                'export HOST_ADDRESS="ec2-${HOST_IP//./-}-compute-1.amazonaws.com"\n'
            )
            script_file_id.write(
                'export AWS_CONFIG_FILE=/home/ubuntu/.aws/config\n')
            script_file_id.write(
                'export AWS_SHARED_CREDENTIALS_FILE=/home/ubuntu/.aws/credentials\n'
            )
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write(
                f'MINICONDA3_BIN_PATH={xlib.get_cluster_app_dir()}/{xlib.get_miniconda3_name()}/bin\n'
            )
            script_file_id.write(f'export PATH=$MINICONDA3_BIN_PATH:$PATH\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write(
                f'STATUS_DIR={xlib.get_status_dir(current_run_dir)}\n')
            script_file_id.write(
                f'SCRIPT_STATUS_OK={xlib.get_status_ok(current_run_dir)}\n')
            script_file_id.write(
                f'SCRIPT_STATUS_WRONG={xlib.get_status_wrong(current_run_dir)}\n'
            )
            script_file_id.write('mkdir --parents $STATUS_DIR\n')
            script_file_id.write(
                'if [ -f $SCRIPT_STATUS_OK ]; then rm $SCRIPT_STATUS_OK; fi\n')
            script_file_id.write(
                'if [ -f $SCRIPT_STATUS_WRONG ]; then rm $SCRIPT_STATUS_WRONG; fi\n'
            )
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function init\n')
            script_file_id.write('{\n')
            script_file_id.write('    INIT_DATETIME=`date --utc +%s`\n')
            script_file_id.write(
                '    FORMATTED_INIT_DATETIME=`date --date="@$INIT_DATETIME" "+%Y-%m-%d %H:%M:%S"`\n'
            )
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write(
                '    echo "Script started at $FORMATTED_INIT_DATETIME+00:00."\n'
            )
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write(f'    echo "CLUSTER: {cluster_name}"\n')
            script_file_id.write('    echo "HOST NAME: $HOSTNAME"\n')
            script_file_id.write('    echo "HOST IP: $HOST_IP"\n')
            script_file_id.write('    echo "HOST ADDRESS: $HOST_ADDRESS"\n')
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function run_fastqc_process\n')
            script_file_id.write('{\n')
            script_file_id.write(
                f'    source activate {xlib.get_fastqc_anaconda_code()}\n')
            script_file_id.write(f'    cd {current_run_dir}\n')
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write('    fastqc --version\n')
            for file_name in file_name_list:
                script_file_id.write('    echo "$SEP"\n')
                script_file_id.write('    /usr/bin/time \\\n')
                script_file_id.write(
                    f'        --format="{xlib.get_time_output_format()}" \\\n')
                script_file_id.write('        fastqc \\\n')
                script_file_id.write(
                    f'            {xlib.get_cluster_read_file(experiment_id, read_dataset_id, file_name)} \\\n'
                )
                script_file_id.write(f'            --threads={threads} \\\n')
                script_file_id.write(
                    f'            --outdir={current_run_dir}\n')
                script_file_id.write('    RC=$?\n')
                script_file_id.write(
                    '    if [ $RC -ne 0 ]; then manage_error fastqc $RC; fi\n')
            script_file_id.write('    conda deactivate\n')
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function end\n')
            script_file_id.write('{\n')
            script_file_id.write('    END_DATETIME=`date --utc +%s`\n')
            script_file_id.write(
                '    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`\n'
            )
            script_file_id.write('    calculate_duration\n')
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write(
                '    echo "Script ended OK at $FORMATTED_END_DATETIME+00:00 with a run duration of $DURATION s ($FORMATTED_DURATION)."\n'
            )
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write('    send_mail ok\n')
            script_file_id.write('    touch $SCRIPT_STATUS_OK\n')
            script_file_id.write('    exit 0\n')
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function manage_error\n')
            script_file_id.write('{\n')
            script_file_id.write('    END_DATETIME=`date --utc +%s`\n')
            script_file_id.write(
                '    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`\n'
            )
            script_file_id.write('    calculate_duration\n')
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write('    echo "ERROR: $1 returned error $2"\n')
            script_file_id.write(
                '    echo "Script ended WRONG at $FORMATTED_END_DATETIME+00:00 with a run duration of $DURATION s ($FORMATTED_DURATION)."\n'
            )
            script_file_id.write('    echo "$SEP"\n')
            script_file_id.write('    send_mail wrong\n')
            script_file_id.write('    touch $SCRIPT_STATUS_WRONG\n')
            script_file_id.write('    exit 3\n')
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            process_name = f'{xlib.get_fastqc_name()} process'
            mail_message_ok = xlib.get_mail_message_ok(process_name,
                                                       cluster_name)
            mail_message_wrong = xlib.get_mail_message_wrong(
                process_name, cluster_name)
            script_file_id.write('function send_mail\n')
            script_file_id.write('{\n')
            script_file_id.write(
                f'    SUBJECT="{xlib.get_project_name()}: {process_name}"\n')
            script_file_id.write('    if [ "$1" == "ok" ]; then\n')
            script_file_id.write(f'        MESSAGE="{mail_message_ok}"\n')
            script_file_id.write('    elif [ "$1" == "wrong" ]; then\n')
            script_file_id.write(f'        MESSAGE="{mail_message_wrong}"\n')
            script_file_id.write('    else\n')
            script_file_id.write('         MESSAGE=""\n')
            script_file_id.write('    fi\n')
            script_file_id.write(
                '    DESTINATION_FILE=mail-destination.json\n')
            script_file_id.write('    echo "{" > $DESTINATION_FILE\n')
            script_file_id.write(
                f'    echo "    \\\"ToAddresses\\\":  [\\\"{xconfiguration.get_contact_data()}\\\"]," >> $DESTINATION_FILE\n'
            )
            script_file_id.write(
                '    echo "    \\\"CcAddresses\\\":  []," >> $DESTINATION_FILE\n'
            )
            script_file_id.write(
                '    echo "    \\\"BccAddresses\\\":  []" >> $DESTINATION_FILE\n'
            )
            script_file_id.write('    echo "}" >> $DESTINATION_FILE\n')
            script_file_id.write('    MESSAGE_FILE=mail-message.json\n')
            script_file_id.write('    echo "{" > $MESSAGE_FILE\n')
            script_file_id.write(
                '    echo "    \\\"Subject\\\": {" >> $MESSAGE_FILE\n')
            script_file_id.write(
                '    echo "        \\\"Data\\\":  \\\"$SUBJECT\\\"," >> $MESSAGE_FILE\n'
            )
            script_file_id.write(
                '    echo "        \\\"Charset\\\":  \\\"UTF-8\\\"" >> $MESSAGE_FILE\n'
            )
            script_file_id.write('    echo "    }," >> $MESSAGE_FILE\n')
            script_file_id.write(
                '    echo "    \\\"Body\\\": {" >> $MESSAGE_FILE\n')
            script_file_id.write(
                '    echo "        \\\"Html\\\": {" >> $MESSAGE_FILE\n')
            script_file_id.write(
                '    echo "            \\\"Data\\\":  \\\"$MESSAGE\\\"," >> $MESSAGE_FILE\n'
            )
            script_file_id.write(
                '    echo "            \\\"Charset\\\":  \\\"UTF-8\\\"" >> $MESSAGE_FILE\n'
            )
            script_file_id.write('    echo "        }" >> $MESSAGE_FILE\n')
            script_file_id.write('    echo "    }" >> $MESSAGE_FILE\n')
            script_file_id.write('    echo "}" >> $MESSAGE_FILE\n')
            script_file_id.write(
                f'    aws ses send-email --from {xconfiguration.get_contact_data()} --destination file://$DESTINATION_FILE --message file://$MESSAGE_FILE\n'
            )
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('function calculate_duration\n')
            script_file_id.write('{\n')
            script_file_id.write(
                '    DURATION=`expr $END_DATETIME - $INIT_DATETIME`\n')
            script_file_id.write('    HH=`expr $DURATION / 3600`\n')
            script_file_id.write('    MM=`expr $DURATION % 3600 / 60`\n')
            script_file_id.write('    SS=`expr $DURATION % 60`\n')
            script_file_id.write(
                '    FORMATTED_DURATION=`printf "%03d:%02d:%02d\\n" $HH $MM $SS`\n'
            )
            script_file_id.write('}\n')
            script_file_id.write(
                '#-------------------------------------------------------------------------------\n'
            )
            script_file_id.write('init\n')
            script_file_id.write('run_fastqc_process\n')
            script_file_id.write('end\n')
    except Exception as e:
        error_list.append(f'*** EXCEPTION: "{e}".')
        error_list.append(
            f'*** ERROR: The file {get_fastqc_process_script()} can not be created'
        )
        OK = False

    # return the control variable and the error list
    return (OK, error_list)
Esempio n. 8
0
def build_cutadapt_process_script(cluster_name, current_run_dir):
    '''
    Build the current cutadapt process script.
    '''

    # initialize the control variable and the error list
    OK = True
    error_list = []

    # get the cutadapt option dictionary
    cutadapt_option_dict = xlib.get_option_dict(get_cutadapt_config_file())

    # get the options
    experiment_id = cutadapt_option_dict['identification']['experiment_id']
    read_dataset_id = cutadapt_option_dict['identification']['read_dataset_id']
    cores = cutadapt_option_dict['cutadapt parameters']['cores']
    adapter = cutadapt_option_dict['cutadapt parameters']['adapter']
    adapter_pe = cutadapt_option_dict['cutadapt parameters']['adapter_pe']
    front = cutadapt_option_dict['cutadapt parameters']['front']
    front_pe = cutadapt_option_dict['cutadapt parameters']['front_pe']
    anywhere = cutadapt_option_dict['cutadapt parameters']['anywhere']
    anywhere_pe = cutadapt_option_dict['cutadapt parameters']['anywhere_pe']
    other_parameters = cutadapt_option_dict['cutadapt parameters']['other_parameters']
    format = cutadapt_option_dict['library']['format']
    read_type = cutadapt_option_dict['library']['read_type']

    # get the sections list
    sections_list = []
    for section in cutadapt_option_dict.keys():
        sections_list.append(section)
    sections_list.sort()

    # build read file lists
    read_file_1_list = []
    read_file_2_list = []
    for section in sections_list:
        # if the section identification is like library-n
        if re.match('^library-[0-9]+$', section):
            read_file_1 = cutadapt_option_dict[section]['read_file_1']
            read_file_1 = xlib.get_cluster_read_file(experiment_id, read_dataset_id, read_file_1)
            read_file_1_list.append(read_file_1)
            if read_type.upper() == 'PE':
                read_file_2 = cutadapt_option_dict[section]['read_file_2']
                read_file_2 = xlib.get_cluster_read_file(experiment_id, read_dataset_id, read_file_2)
                read_file_2_list.append(read_file_2)

    # get the output read directory
    output_read_dir = xlib.get_cluster_experiment_read_dataset_dir(experiment_id, os.path.basename(current_run_dir))

    # write the cutadapt process script
    try:
        if not os.path.exists(os.path.dirname(get_cutadapt_process_script())):
            os.makedirs(os.path.dirname(get_cutadapt_process_script()))
        with open(get_cutadapt_process_script(), mode='w', encoding='iso-8859-1', newline='\n') as script_file_id:
            script_file_id.write( '#!/bin/bash\n')
            script_file_id.write( '#-------------------------------------------------------------------------------\n')
            script_file_id.write( 'SEP="#########################################"\n')
            script_file_id.write( 'export HOST_IP=`curl --silent checkip.amazonaws.com`\n')
            script_file_id.write( 'export HOST_ADDRESS="ec2-${HOST_IP//./-}-compute-1.amazonaws.com"\n')
            script_file_id.write( 'export AWS_CONFIG_FILE=/home/ubuntu/.aws/config\n')
            script_file_id.write( 'export AWS_SHARED_CREDENTIALS_FILE=/home/ubuntu/.aws/credentials\n')
            script_file_id.write( '#-------------------------------------------------------------------------------\n')
            script_file_id.write(f'CUTADAPT_PATH={xlib.get_cluster_app_dir()}/{xlib.get_miniconda3_name()}/envs/{xlib.get_cutadapt_anaconda_code()}/bin\n')
            script_file_id.write( 'export PATH=$CUTADAPT_PATH:$PATH\n')
            script_file_id.write( '#-------------------------------------------------------------------------------\n')
            script_file_id.write(f'STATUS_DIR={xlib.get_status_dir(current_run_dir)}\n')
            script_file_id.write(f'SCRIPT_STATUS_OK={xlib.get_status_ok(current_run_dir)}\n')
            script_file_id.write(f'SCRIPT_STATUS_WRONG={xlib.get_status_wrong(current_run_dir)}\n')
            script_file_id.write( 'mkdir --parents $STATUS_DIR\n')
            script_file_id.write( 'if [ -f $SCRIPT_STATUS_OK ]; then rm $SCRIPT_STATUS_OK; fi\n')
            script_file_id.write( 'if [ -f $SCRIPT_STATUS_WRONG ]; then rm $SCRIPT_STATUS_WRONG; fi\n')
            script_file_id.write( '#-------------------------------------------------------------------------------\n')
            script_file_id.write( 'function init\n')
            script_file_id.write( '{\n')
            script_file_id.write( '    INIT_DATETIME=`date --utc +%s`\n')
            script_file_id.write( '    FORMATTED_INIT_DATETIME=`date --date="@$INIT_DATETIME" "+%Y-%m-%d %H:%M:%S"`\n')
            script_file_id.write( '    echo "$SEP"\n')
            script_file_id.write( '    echo "Script started at $FORMATTED_INIT_DATETIME+00:00."\n')
            script_file_id.write( '    echo "$SEP"\n')
            script_file_id.write(f'    echo "CLUSTER: {cluster_name}"\n')
            script_file_id.write( '    echo "HOST NAME: $HOSTNAME"\n')
            script_file_id.write( '    echo "HOST IP: $HOST_IP"\n')
            script_file_id.write( '    echo "HOST ADDRESS: $HOST_ADDRESS"\n')
            script_file_id.write( '}\n')
            script_file_id.write( '#-------------------------------------------------------------------------------\n')
            script_file_id.write( 'function run_cutadapt_process\n')
            script_file_id.write( '{\n')
            script_file_id.write(f'    mkdir --parents {output_read_dir}\n')
            script_file_id.write(f'    cd {current_run_dir}\n')
            script_file_id.write( '    echo "$SEP"\n')
            script_file_id.write( '    echo "cutadapt v`cutadapt --version`"\n')
            for i in range(len(read_file_1_list)):
                script_file_id.write( '    echo "$SEP"\n')
                script_file_id.write( '    /usr/bin/time \\\n')
                script_file_id.write(f'        --format="{xlib.get_time_output_format()}" \\\n')
                script_file_id.write( '        cutadapt \\\n')
                script_file_id.write(f'            --cores={cores} \\\n')
                script_file_id.write(f'            --adapter={adapter} \\\n')
                if adapter_pe.upper() != 'NONE':
                    script_file_id.write(f'            -A {adapter_pe} \\\n')
                if front.upper() != 'NONE':
                    script_file_id.write(f'            --front {front} \\\n')
                if front_pe.upper() != 'NONE':
                    script_file_id.write(f'            -G {front_pe} \\\n')
                if anywhere.upper() != 'NONE':
                    script_file_id.write(f'            --anywhere {anywhere} \\\n')
                if anywhere_pe.upper() != 'NONE':
                    script_file_id.write(f'            -B {anywhere_pe} \\\n')
                if other_parameters.upper() != 'NONE':
                    parameter_list = [x.strip() for x in other_parameters.split(';')]
                    for j in range(len(parameter_list)):
                        if parameter_list[j].find('=') > 0:
                            pattern = r'^--(.+)=(.+)$'
                            mo = re.search(pattern, parameter_list[j])
                            parameter_name = mo.group(1).strip()
                            parameter_value = mo.group(2).strip()
                            script_file_id.write(f'            --{parameter_name}={parameter_value} \\\n')
                        else:
                            pattern = r'^--(.+)$'
                            mo = re.search(pattern, parameter_list[j])
                            parameter_name = mo.group(1).strip()
                            script_file_id.write(f'            --{parameter_name} \\\n')
                if read_type.upper() == 'SE':
                    script_file_id.write(f'            --output={output_read_dir}/{os.path.basename(read_file_1_list[i])} \\\n')
                    script_file_id.write(f'            {read_file_1_list[i]}\n')
                elif read_type.upper() == 'PE':
                    script_file_id.write(f'            --output={output_read_dir}/{os.path.basename(read_file_1_list[i])} \\\n')
                    script_file_id.write(f'            --paired-output={output_read_dir}/{os.path.basename(read_file_2_list[i])} \\\n')
                    script_file_id.write(f'            {read_file_1_list[i]} \\\n')
                    script_file_id.write(f'            {read_file_2_list[i]}\n')
                script_file_id.write( '    RC=$?\n')
                script_file_id.write( '    if [ $RC -ne 0 ]; then manage_error cutadapt $RC; fi\n')
            script_file_id.write( '}\n')
            script_file_id.write( '#-------------------------------------------------------------------------------\n')
            script_file_id.write( 'function end\n')
            script_file_id.write( '{\n')
            script_file_id.write( '    END_DATETIME=`date --utc +%s`\n')
            script_file_id.write( '    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`\n')
            script_file_id.write( '    calculate_duration\n')
            script_file_id.write( '    echo "$SEP"\n')
            script_file_id.write( '    echo "Script ended OK at $FORMATTED_END_DATETIME+00:00 with a run duration of $DURATION s ($FORMATTED_DURATION)."\n')
            script_file_id.write( '    echo "$SEP"\n')
            script_file_id.write( '    send_mail ok\n')
            script_file_id.write( '    touch $SCRIPT_STATUS_OK\n')
            script_file_id.write( '    exit 0\n')
            script_file_id.write( '}\n')
            script_file_id.write( '#-------------------------------------------------------------------------------\n')
            script_file_id.write( 'function manage_error\n')
            script_file_id.write( '{\n')
            script_file_id.write( '    END_DATETIME=`date --utc +%s`\n')
            script_file_id.write( '    FORMATTED_END_DATETIME=`date --date="@$END_DATETIME" "+%Y-%m-%d %H:%M:%S"`\n')
            script_file_id.write( '    calculate_duration\n')
            script_file_id.write( '    echo "$SEP"\n')
            script_file_id.write( '    echo "ERROR: $1 returned error $2"\n')
            script_file_id.write( '    echo "Script ended WRONG at $FORMATTED_END_DATETIME+00:00 with a run duration of $DURATION s ($FORMATTED_DURATION)."\n')
            script_file_id.write( '    echo "$SEP"\n')
            script_file_id.write( '    send_mail wrong\n')
            script_file_id.write( '    touch $SCRIPT_STATUS_WRONG\n')
            script_file_id.write( '    exit 3\n')
            script_file_id.write( '}\n')
            script_file_id.write( '#-------------------------------------------------------------------------------\n')
            process_name = f'{xlib.get_cutadapt_name()} process'
            mail_message_ok = xlib.get_mail_message_ok(process_name, cluster_name)
            mail_message_wrong = xlib.get_mail_message_wrong(process_name, cluster_name)
            script_file_id.write( 'function send_mail\n')
            script_file_id.write( '{\n')
            script_file_id.write(f'    SUBJECT="{xlib.get_project_name()}: {process_name}"\n')
            script_file_id.write( '    if [ "$1" == "ok" ]; then\n')
            script_file_id.write(f'        MESSAGE="{mail_message_ok}"\n')
            script_file_id.write( '    elif [ "$1" == "wrong" ]; then\n')
            script_file_id.write(f'        MESSAGE="{mail_message_wrong}"\n')
            script_file_id.write( '    else\n')
            script_file_id.write( '         MESSAGE=""\n')
            script_file_id.write( '    fi\n')
            script_file_id.write( '    DESTINATION_FILE=mail-destination.json\n')
            script_file_id.write( '    echo "{" > $DESTINATION_FILE\n')
            script_file_id.write(f'    echo "    \\\"ToAddresses\\\":  [\\\"{xconfiguration.get_contact_data()}\\\"]," >> $DESTINATION_FILE\n')
            script_file_id.write( '    echo "    \\\"CcAddresses\\\":  []," >> $DESTINATION_FILE\n')
            script_file_id.write( '    echo "    \\\"BccAddresses\\\":  []" >> $DESTINATION_FILE\n')
            script_file_id.write( '    echo "}" >> $DESTINATION_FILE\n')
            script_file_id.write( '    MESSAGE_FILE=mail-message.json\n')
            script_file_id.write( '    echo "{" > $MESSAGE_FILE\n')
            script_file_id.write( '    echo "    \\\"Subject\\\": {" >> $MESSAGE_FILE\n')
            script_file_id.write( '    echo "        \\\"Data\\\":  \\\"$SUBJECT\\\"," >> $MESSAGE_FILE\n')
            script_file_id.write( '    echo "        \\\"Charset\\\":  \\\"UTF-8\\\"" >> $MESSAGE_FILE\n')
            script_file_id.write( '    echo "    }," >> $MESSAGE_FILE\n')
            script_file_id.write( '    echo "    \\\"Body\\\": {" >> $MESSAGE_FILE\n')
            script_file_id.write( '    echo "        \\\"Html\\\": {" >> $MESSAGE_FILE\n')
            script_file_id.write( '    echo "            \\\"Data\\\":  \\\"$MESSAGE\\\"," >> $MESSAGE_FILE\n')
            script_file_id.write( '    echo "            \\\"Charset\\\":  \\\"UTF-8\\\"" >> $MESSAGE_FILE\n')
            script_file_id.write( '    echo "        }" >> $MESSAGE_FILE\n')
            script_file_id.write( '    echo "    }" >> $MESSAGE_FILE\n')
            script_file_id.write( '    echo "}" >> $MESSAGE_FILE\n')
            script_file_id.write(f'    aws ses send-email --from {xconfiguration.get_contact_data()} --destination file://$DESTINATION_FILE --message file://$MESSAGE_FILE\n')
            script_file_id.write( '}\n')
            script_file_id.write( '#-------------------------------------------------------------------------------\n')
            script_file_id.write( 'function calculate_duration\n')
            script_file_id.write( '{\n')
            script_file_id.write( '    DURATION=`expr $END_DATETIME - $INIT_DATETIME`\n')
            script_file_id.write( '    HH=`expr $DURATION / 3600`\n')
            script_file_id.write( '    MM=`expr $DURATION % 3600 / 60`\n')
            script_file_id.write( '    SS=`expr $DURATION % 60`\n')
            script_file_id.write( '    FORMATTED_DURATION=`printf "%03d:%02d:%02d\\n" $HH $MM $SS`\n')
            script_file_id.write( '}\n')
            script_file_id.write( '#-------------------------------------------------------------------------------\n')
            script_file_id.write( 'init\n')
            script_file_id.write( 'run_cutadapt_process\n')
            script_file_id.write( 'end\n')
    except Exception as e:
        error_list.append(f'*** EXCEPTION: "{e}".')
        error_list.append(f'*** ERROR: The file {get_cutadapt_process_script()} can not be created.')
        OK = False

    # return the control variable and the error list
    return (OK, error_list)