Ejemplo n.º 1
0
Archivo: glog.py Proyecto: GGFHF/TOA
    def populate_combobox_process_type(self):
        '''
        Populate data in "combobox_process_type".
        '''

        # clear the value selected in the combobox
        self.wrapper_process_type.set('')

        # initialize the process type list
        process_type_list = []

        # get the dictionary of TOA configuration.
        toa_config_dict = xtoa.get_toa_config_dict()

        # get the process type list
        subdir_list = [
            subdir for subdir in os.listdir(toa_config_dict['RESULT_DIR'])
            if os.path.isdir(
                os.path.join(toa_config_dict['RESULT_DIR'], subdir))
        ]
        for subdir in subdir_list:
            process_type_list.append(subdir)

        # check if there are any process type
        if process_type_list == []:
            message = 'There is not any run.'
            tkinter.messagebox.showwarning(
                f'{xlib.get_short_project_name()} - {self.head}', message)
            return

        # load the names of process types
        self.combobox_process_type['values'] = sorted(process_type_list)
Ejemplo n.º 2
0
Archivo: cinputs.py Proyecto: GGFHF/TOA
def input_result_dataset_id(experiment_id, app_list):
    '''
    Input a result dataset identification.
    '''

    # initialize the control variable
    OK = True

    # get the dictionary of TOA configuration.
    toa_config_dict = xtoa.get_toa_config_dict()

    # initialize the result dataset identification
    result_dataset_id = ''

    # initialize the result dataset list
    result_dataset_id_list = []

    # get the result dataset identifications of the experiment
    experiment_dir = f'''{toa_config_dict['RESULT_DIR']}/{experiment_id}'''
    subdir_list = sorted([
        subdir for subdir in os.listdir(experiment_dir)
        if os.path.isdir(os.path.join(experiment_dir, subdir))
    ])
    for subdir in subdir_list:
        for app in app_list:
            if app == xlib.get_all_applications_selected_code(
            ) or subdir.startswith(app):
                result_dataset_id_list.append(subdir)
                break

    # print the result dataset identifications in the clusters
    if result_dataset_id_list != []:
        result_dataset_id_list_text = str(result_dataset_id_list).strip(
            '[]').replace('\'', '')
        print(
            f'dataset ids existing in {experiment_id}: {result_dataset_id_list_text} ...'
        )
    else:
        OK = False

    # input and check the result dataset identification
    if OK:
        while result_dataset_id == '':
            result_dataset_id = input('... Enter the dataset id: ')
            if result_dataset_id not in result_dataset_id_list:
                print(f'*** ERROR: {result_dataset_id} does not exist.')
                result_dataset_id = ''

    # return the result dataset identification
    return result_dataset_id
Ejemplo n.º 3
0
def form_view_result_log():
    '''
    View the log of an experiment/process result.
    '''

    # initialize the control variable
    OK = True

    # print the header
    clib.clear_screen()
    clib.print_headers_with_environment(
        'Logs - View an experiment/process result log')

    # get the experiment identification
    if OK:
        experiment_id = cinputs.input_experiment_id()
        if experiment_id == '':
            print('WARNING: There is not any experiment/process data.')
            OK = False

    # get the result_dataset identification
    if OK:
        result_dataset_id = cinputs.input_result_dataset_id(
            experiment_id, xlib.get_all_applications_selected_code())
        if result_dataset_id == '':
            print(
                f'WARNING: The experiment/process {experiment_id} does not have result datasets.'
            )
            OK = False

    # get the dictionary of TOA configuration.
    if OK:
        toa_config_dict = xtoa.get_toa_config_dict()

    # get the log file name and build local and cluster paths
    if OK:
        log_file = f'{toa_config_dict["RESULT_DIR"]}/{experiment_id}/{result_dataset_id}/{xlib.get_run_log_file()}'

    # view the log file
    if OK:
        text = 'Logs - View an experiment/process log'
        OK = clib.view_file(log_file, text)

    # show continuation message
    input('Press [Intro] to continue ...')
Ejemplo n.º 4
0
Archivo: cinputs.py Proyecto: GGFHF/TOA
def input_experiment_id():
    '''
    Input an experiment/process identification.
    '''

    # initialize the control variable
    OK = True

    # initialize the experiment/process identification
    experiment_id = ''

    # initialize the experiment/process identification list
    experiment_id_list = []

    # get the dictionary of TOA configuration.
    toa_config_dict = xtoa.get_toa_config_dict()

    # get the experiment/process identifications
    subdir_list = [
        subdir for subdir in os.listdir(toa_config_dict['RESULT_DIR'])
        if os.path.isdir(os.path.join(toa_config_dict['RESULT_DIR'], subdir))
    ]
    for subdir in subdir_list:
        experiment_id_list.append(subdir)

    # print the experiment/process identifications in the clusters
    if experiment_id_list != []:
        experiment_id_list_text = str(experiment_id_list).strip('[]').replace(
            '\'', '')
        print(
            f'Experiment/process ids existing: {experiment_id_list_text} ...')
    else:
        OK = False

    # input and check the experiment/process identification
    if OK:
        while experiment_id == '':
            experiment_id = input('... Enter the experiment/process id: ')
            if experiment_id not in experiment_id_list:
                print(f'*** ERROR: {experiment_id} does not exist.')
                experiment_id = ''

    # return the experiment/process identification
    return experiment_id
Ejemplo n.º 5
0
Archivo: glog.py Proyecto: GGFHF/TOA
    def execute(self, event=None):
        '''
        Execute the list the result logs in the cluster.
        '''

        # if "button_execute" is disabled, exit function
        if str(self.button_execute['state']) == 'disabled':
            return

        # check inputs
        OK = self.check_inputs()
        if not OK:
            message = 'Some input values are not OK.'
            tkinter.messagebox.showerror(
                f'{xlib.get_short_project_name()} - {self.head}', message)

        # get the dictionary of TOA configuration.
        if OK:
            toa_config_dict = xtoa.get_toa_config_dict()

        # get the run dictionary
        if OK:
            process_type_dir = f'{toa_config_dict["RESULT_DIR"]}/{self.wrapper_process_type.get()}'
            subdir_list = [
                subdir for subdir in os.listdir(process_type_dir)
                if os.path.isdir(os.path.join(process_type_dir, subdir))
            ]
            result_dataset_dict = {}
            for subdir in subdir_list:
                result_dataset_id = subdir
                try:
                    pattern = r'^(.+)\-(.+)\-(.+)$'
                    mo = re.search(pattern, result_dataset_id)
                    bioinfo_app_code = mo.group(1).strip()
                    yymmdd = mo.group(2)
                    hhmmss = mo.group(3)
                    date = f'20{yymmdd[:2]}-{yymmdd[2:4]}-{yymmdd[4:]}'
                    time = f'{hhmmss[:2]}:{hhmmss[2:4]}:{hhmmss[4:]}'
                except:
                    bioinfo_app_code = 'xxx'
                    date = '0000-00-00'
                    time = '00:00:00'

                if result_dataset_id.startswith(xlib.get_blastplus_code() +
                                                '-'):
                    bioinfo_app_name = xlib.get_blastplus_name()

                elif result_dataset_id.startswith(xlib.get_diamond_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_diamond_name()

                elif result_dataset_id.startswith(
                        xlib.get_entrez_direct_code() + '-'):
                    bioinfo_app_name = xlib.get_entrez_direct_name()

                elif result_dataset_id.startswith(xlib.get_miniconda3_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_miniconda3_name()

                elif result_dataset_id.startswith(xlib.get_r_code() + '-'):
                    bioinfo_app_name = xlib.get_r_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_basic_data_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_basic_data_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_dicots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_dicots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_gene_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_gene_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_go_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_go_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_gymno_01_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_gymno_01_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_interpro_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_interpro_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_monocots_04_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_monocots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_taxonomy_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_taxonomy_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.
                        get_toa_process_gilist_viridiplantae_nucleotide_gi_code(
                        ) + '-'):
                    bioinfo_app_name = xlib.get_toa_process_gilist_viridiplantae_nucleotide_gi_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.
                        get_toa_process_gilist_viridiplantae_protein_gi_code()
                        + '-'):
                    bioinfo_app_name = xlib.get_toa_process_gilist_viridiplantae_protein_gi_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_basic_data_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_basic_data_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_dicots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_dicots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_gene_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_gene_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_go_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_go_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_gymno_01_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_gymno_01_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_interpro_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_interpro_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_monocots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_monocots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_merge_annotations_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_merge_annotations_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_nr_blastplus_db_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_nr_blastplus_db_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_nr_diamond_db_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_nr_diamond_db_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_nt_blastplus_db_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_nt_blastplus_db_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_pipeline_aminoacid_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_pipeline_aminoacid_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_pipeline_nucleotide_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_pipeline_nucleotide_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_dicots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_dicots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_gymno_01_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_gymno_01_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_monocots_04_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_monocots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_refseq_plant_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_refseq_plant_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_rebuild_toa_database_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_get_toa_process_rebuild_toa_database_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_recreate_toa_database_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_get_toa_process_recreate_toa_database_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_transdecoder_code() + '-'):
                    bioinfo_app_name = xlib.get_transdecoder_name()

                else:
                    bioinfo_app_name = 'xxx'

                status_ok = os.path.isfile(
                    xlib.get_status_ok(os.path.join(process_type_dir, subdir)))
                status_wrong = os.path.isfile(
                    xlib.get_status_wrong(
                        os.path.join(process_type_dir, subdir)))
                if status_ok and not status_wrong:
                    status = 'OK'
                elif not status_ok and status_wrong:
                    status = 'wrong'
                elif not status_ok and not status_wrong:
                    status = 'not finished'
                elif status_ok and status_wrong:
                    status = 'undetermined'
                key = f'{bioinfo_app_name}-{result_dataset_id}'
                result_dataset_dict[key] = {
                    'process_type': self.wrapper_process_type.get(),
                    'bioinfo_app': bioinfo_app_name,
                    'result_dataset_id': result_dataset_id,
                    'date': date,
                    'time': time,
                    'status': status
                }

        # check if there are any nodes running
        if OK:
            if result_dataset_dict == {}:
                message = 'There is not any run.'
                tkinter.messagebox.showwarning(
                    f'{xlib.get_short_project_name()} - {self.head}', message)

        # build the data list
        if OK:
            data_list = [
                'process_type', 'bioinfo_app', 'result_dataset_id', 'date',
                'time', 'status'
            ]

        # build the data dictionary
        if OK:
            data_dict = {}
            data_dict['process_type'] = {
                'text': 'Process type',
                'width': 180,
                'alignment': 'left'
            }
            data_dict['bioinfo_app'] = {
                'text': 'Bioinfo app / Utility',
                'width': 340,
                'alignment': 'left'
            }
            data_dict['result_dataset_id'] = {
                'text': 'Result dataset',
                'width': 225,
                'alignment': 'left'
            }
            data_dict['date'] = {
                'text': 'Date',
                'width': 95,
                'alignment': 'right'
            }
            data_dict['time'] = {
                'text': 'Time',
                'width': 75,
                'alignment': 'right'
            }
            data_dict['status'] = {
                'text': 'Status',
                'width': 90,
                'alignment': 'left'
            }

        # create the dialog Table to show the nodes running
        if OK:
            dialog_table = gdialogs.DialogTable(
                self,
                f'Runs in {xlib.get_result_dir()}/{self.wrapper_process_type.get()}',
                400, 1030, data_list, data_dict, result_dataset_dict,
                sorted(result_dataset_dict.keys()), 'view_result_logs',
                ['revisar'])
            self.wait_window(dialog_table)

        # close the form
        if OK:
            self.close()
Ejemplo n.º 6
0
def form_view_phylogenic_data_frecuency(stats_code):
    '''
    View the frecuency distribution of phylogenic data.
    '''

    # initialize the control variable
    OK = True

    # assign the text of the "name"
    if stats_code == 'species':
        name = 'Species - Frequency distribution'
    elif stats_code == 'family':
        name = 'Family - Frequency distribution'
    elif stats_code == 'phylum':
        name = 'Phylum - Frequency distribution'
    elif stats_code == 'namespace':
        name = 'GO - Frequency distribution per namespace'

    # print the header
    clib.clear_screen()
    clib.print_headers_with_environment(f'Statistics - {name} data')

    # get the pipeline dataset identification
    app_list = [
        xlib.get_toa_process_pipeline_nucleotide_code(),
        xlib.get_toa_process_pipeline_aminoacid_code(),
        xlib.get_toa_process_merge_annotations_code()
    ]
    pipeline_dataset_id = cinputs.input_result_dataset_id(
        xlib.get_toa_result_pipeline_dir(), app_list)
    if pipeline_dataset_id == '':
        print(
            'WARNING: There are not any annotation pipeline result datasets.')
        OK = False

    # build distribution dictionary
    if OK:

        # initialize the distribution dictionary
        distribution_dict = {}

        # get the dictionary of TOA configuration
        toa_config_dict = xtoa.get_toa_config_dict()

        # get the statistics file path
        stats_file = f'{toa_config_dict["RESULT_DIR"]}/{xlib.get_toa_result_pipeline_dir()}/{pipeline_dataset_id}/{toa_config_dict["STATS_SUBDIR_NAME"]}/{stats_code}-{toa_config_dict["STATS_BASE_NAME"]}.csv'

        # open the statistics file
        if stats_file.endswith('.gz'):
            try:
                stats_file_id = gzip.open(stats_file,
                                          mode='rt',
                                          encoding='iso-8859-1',
                                          newline='\n')
            except Exception as e:
                raise xlib.ProgramException('F002', stats_file)
        else:
            try:
                stats_file_id = open(stats_file,
                                     mode='r',
                                     encoding='iso-8859-1',
                                     newline='\n')
            except Exception as e:
                raise xlib.ProgramException('F001', stats_file)

        # initialize the record counter
        record_counter = 0

        # initialize the header record control
        header_record = True

        # read the first record
        record = stats_file_id.readline()

        # while there are records
        while record != '':

            # add 1 to the record counter
            record_counter += 1

            # process the header record
            if header_record:
                header_record = False

            # process data records
            else:

                # extract data
                # record format: "stats_code_id";"all_count";"first_hsp_count";"min_evalue_count"
                data_list = []
                begin = 0
                for end in [i for i, chr in enumerate(record) if chr == ';']:
                    data_list.append(record[begin:end].strip('"'))
                    begin = end + 1
                data_list.append(record[begin:].strip('\n').strip('"'))
                try:
                    id = data_list[0]
                    all_count = data_list[1]
                    first_hsp_count = data_list[2]
                    min_evalue_count = data_list[3]
                except Exception as e:
                    raise xlib.ProgramException('F006',
                                                os.path.basename(stats_file),
                                                record_counter)

                # add dato to the dictionary
                distribution_dict[id] = {
                    'id': id,
                    'all_count': all_count,
                    'first_hsp_count': first_hsp_count,
                    'min_evalue_count': min_evalue_count
                }

            # read the next record
            record = stats_file_id.readline()

    # print the distribution
    if OK:
        print(xlib.get_separator())
        if distribution_dict == {}:
            print('*** WARNING: There is not any distribution.')
        else:
            # set data width
            id_width = 50
            all_count_width = 11
            first_hsp_count_width = 11
            min_evalue_count_width = 11
            # set line template
            line_template = '{0:' + str(id_width) + '}   {1:' + str(
                all_count_width) + '}   {2:' + str(
                    first_hsp_count_width) + '}   {3:' + str(
                        min_evalue_count_width) + '}'
            # print header
            print(
                line_template.format(stats_code.capitalize(), 'All',
                                     'First HSP', 'Min e-value'))
            print(
                line_template.format('=' * id_width, '=' * all_count_width,
                                     '=' * first_hsp_count_width,
                                     '=' * min_evalue_count_width))
            # print detail lines
            for key in sorted(distribution_dict.keys()):
                print(
                    line_template.format(
                        distribution_dict[key]['id'],
                        distribution_dict[key]['all_count'],
                        distribution_dict[key]['first_hsp_count'],
                        distribution_dict[key]['min_evalue_count']))

    # show continuation message
    print(xlib.get_separator())
    input('Press [Intro] to continue ...')
Ejemplo n.º 7
0
def form_view_dataset_data_frecuency():
    '''
    View the frecuency distribution of annotation dataset data.
    '''

    # initialize the control variable
    OK = True

    # print the header
    clib.clear_screen()
    clib.print_headers_with_environment(
        'Statistics - Annotation datasets - Frequency distribution data')

    # get the pipeline dataset identification
    app_list = [
        xlib.get_toa_process_pipeline_nucleotide_code(),
        xlib.get_toa_process_pipeline_aminoacid_code()
    ]
    pipeline_dataset_id = cinputs.input_result_dataset_id(
        xlib.get_toa_result_pipeline_dir(), app_list)
    if pipeline_dataset_id == '':
        print(
            'WARNING: There are not any annotation pipeline result datasets.')
        OK = False

    # build distribution dictionary
    if OK:

        # initialize the distribution dictionary
        distribution_dict = {}

        # get the dictionary of TOA configuration
        toa_config_dict = xtoa.get_toa_config_dict()

        # get the statistics file path
        stats_file = f'{toa_config_dict["RESULT_DIR"]}/{xlib.get_toa_result_pipeline_dir()}/{pipeline_dataset_id}/{toa_config_dict["STATS_SUBDIR_NAME"]}/dataset-{toa_config_dict["STATS_BASE_NAME"]}.csv'

        # open the statistics file
        if stats_file.endswith('.gz'):
            try:
                stats_file_id = gzip.open(stats_file,
                                          mode='rt',
                                          encoding='iso-8859-1',
                                          newline='\n')
            except Exception as e:
                raise xlib.ProgramException('F002', stats_file)
        else:
            try:
                stats_file_id = open(stats_file,
                                     mode='r',
                                     encoding='iso-8859-1',
                                     newline='\n')
            except Exception as e:
                raise xlib.ProgramException('F001', stats_file)

        # initialize the record counter
        record_counter = 0

        # initialize the header record control
        header_record = True

        # read the first record
        record = stats_file_id.readline()

        # while there are records
        while record != '':

            # add 1 to the record counter
            record_counter += 1

            # process the header record
            if header_record:
                header_record = False

            # process data records
            else:

                # extract data
                # record format: "dataset_name";"annotated_seq_count";"remained_seq_count"
                data_list = []
                begin = 0
                for end in [i for i, chr in enumerate(record) if chr == ';']:
                    data_list.append(record[begin:end].strip('"'))
                    begin = end + 1
                data_list.append(record[begin:].strip('\n').strip('"'))
                try:
                    dataset_name = data_list[0]
                    annotated_seq_count = data_list[1]
                    remained_seq_count = data_list[2]
                except Exception as e:
                    raise xlib.ProgramException('F006',
                                                os.path.basename(stats_file),
                                                record_counter)

                # add dato to the dictionary
                distribution_dict[record_counter] = {
                    'dataset_name': dataset_name,
                    'annotated_seq_count': annotated_seq_count,
                    'remained_seq_count': remained_seq_count
                }

            # read the next record
            record = stats_file_id.readline()

    # print the distribution
    if OK:
        print(xlib.get_separator())
        if distribution_dict == {}:
            print('*** WARNING: There is not any distribution.')
        else:
            # set data width
            dataset_name_width = 19
            annotated_seq_count_width = 14
            remained_seq_count_width = 14
            # set line template
            line_template = '{0:' + str(dataset_name_width) + '}   {1:' + str(
                annotated_seq_count_width) + '}   {2:' + str(
                    remained_seq_count_width) + '}'
            # print header
            print(
                line_template.format('Dataset', 'Annotated seqs',
                                     'Remained seqs'))
            print(
                line_template.format('=' * dataset_name_width,
                                     '=' * annotated_seq_count_width,
                                     '=' * remained_seq_count_width))
            # print detail lines
            for key in sorted(distribution_dict.keys()):
                print(
                    line_template.format(
                        distribution_dict[key]['dataset_name'],
                        distribution_dict[key]['annotated_seq_count'],
                        distribution_dict[key]['remained_seq_count']))

    # show continuation message
    print(xlib.get_separator())
    input('Press [Intro] to continue ...')
Ejemplo n.º 8
0
def form_view_x_per_y_data(stats_code):
    '''
    View the x per y data.
    '''

    # initialize the control variable
    OK = True

    # assign the text of the "name"
    if stats_code == 'hit_per_hsp':
        name = '# HITs per # HSPs'
    elif stats_code == 'seq_per_go':
        name = '# sequences per # GO terms'
    elif stats_code == 'seq_per_ec':
        name = '# sequences per # EC ids'
    elif stats_code == 'seq_per_interpro':
        name = '# sequences per # InterPro ids'
    elif stats_code == 'seq_per_kegg':
        name = '# sequences per # KEGG ids'
    elif stats_code == 'seq_per_mapman':
        name = '# sequences per # MapMan ids'
    elif stats_code == 'seq_per_metacyc':
        name = '# sequences per # MetaCyc ids'

    # print the header
    clib.clear_screen()
    clib.print_headers_with_environment(f'Statistics - {name} data')

    # get the pipeline dataset identification
    if stats_code == 'hit_per_hsp':
        app_list = [
            xlib.get_toa_process_pipeline_nucleotide_code(),
            xlib.get_toa_process_pipeline_aminoacid_code()
        ]
    else:
        app_list = [
            xlib.get_toa_process_pipeline_nucleotide_code(),
            xlib.get_toa_process_pipeline_aminoacid_code(),
            xlib.get_toa_process_merge_annotations_code()
        ]
    pipeline_dataset_id = cinputs.input_result_dataset_id(
        xlib.get_toa_result_pipeline_dir(), app_list)
    if pipeline_dataset_id == '':
        print(
            'WARNING: There are not any annotation pipeline result datasets.')
        OK = False

    # build distribution dictionary
    if OK:

        # initialize the distribution dictionary
        distribution_dict = {}

        # get the dictionary of TOA configuration
        toa_config_dict = xtoa.get_toa_config_dict()

        # get the statistics file path
        stats_file = f'{toa_config_dict["RESULT_DIR"]}/{xlib.get_toa_result_pipeline_dir()}/{pipeline_dataset_id}/{toa_config_dict["STATS_SUBDIR_NAME"]}/{stats_code}-{toa_config_dict["STATS_BASE_NAME"]}.csv'

        # open the statistics file
        if stats_file.endswith('.gz'):
            try:
                stats_file_id = gzip.open(stats_file,
                                          mode='rt',
                                          encoding='iso-8859-1',
                                          newline='\n')
            except Exception as e:
                raise xlib.ProgramException('F002', stats_file)
        else:
            try:
                stats_file_id = open(stats_file,
                                     mode='r',
                                     encoding='iso-8859-1',
                                     newline='\n')
            except Exception as e:
                raise xlib.ProgramException('F001', stats_file)

        # initialize the record counter
        record_counter = 0

        # initialize the header record control
        header_record = True

        # read the first record
        record = stats_file_id.readline()

        # while there are records
        while record != '':

            # add 1 to the record counter
            record_counter += 1

            # process the header record
            if header_record:
                header_record = False

            # process data records
            else:

                # extract data
                # record format: "x_count";"y_count"
                data_list = []
                begin = 0
                for end in [i for i, chr in enumerate(record) if chr == ';']:
                    data_list.append(record[begin:end].strip('"'))
                    begin = end + 1
                data_list.append(record[begin:].strip('\n').strip('"'))
                try:
                    x_count = data_list[0]
                    y_count = data_list[1]
                except Exception as e:
                    raise xlib.ProgramException('F006',
                                                os.path.basename(stats_file),
                                                record_counter)

                # add dato to the dictionary
                distribution_dict[record_counter] = {
                    'x_count': x_count,
                    'y_count': y_count
                }

            # read the next record
            record = stats_file_id.readline()

    # print the distribution
    if OK:
        print(xlib.get_separator())
        if distribution_dict == {}:
            print('*** WARNING: There is not any stats data.')
        else:
            # set data width
            x_count_width = 15
            y_count_width = 15
            # set line template
            line_template = '{0:' + str(x_count_width) + '}   {1:' + str(
                y_count_width) + '}'
            # print header
            if stats_code == 'hit_per_hsp':
                print(line_template.format('# HSPs', '# HITs'))
            elif stats_code == 'seq_per_go':
                print(line_template.format('# GO terms', '# sequences'))
            elif stats_code == 'seq_per_ec':
                print(line_template.format('# EC ids', '# sequences'))
            elif stats_code == 'seq_per_interpro':
                print(line_template.format('# InterPro ids', '# sequences'))
            elif stats_code == 'seq_per_kegg':
                print(line_template.format('# KEGG ids', '# sequences'))
            elif stats_code == 'seq_per_mapman':
                print(line_template.format('# MapMan ids', '# sequences'))
            elif stats_code == 'seq_per_metacyc':
                print(line_template.format('# MetaCyc ids', '# sequences'))
            print(
                line_template.format('=' * x_count_width, '=' * y_count_width))
            # print detail lines
            for key in sorted(distribution_dict.keys()):
                print(
                    line_template.format(distribution_dict[key]['x_count'],
                                         distribution_dict[key]['y_count']))

    # show continuation message
    print(xlib.get_separator())
    input('Press [Intro] to continue ...')
Ejemplo n.º 9
0
def form_list_results_logs():
    '''
    List the processes of an experiment in the cluster.
    '''

    # initialize the control variable
    OK = True

    # print the header
    clib.clear_screen()
    clib.print_headers_with_environment('Logs - List result logs')

    # get experiment identification
    experiment_id = cinputs.input_experiment_id()
    if experiment_id == '':
        print('WARNING: There is not any experiment/process run.')
        OK = False

    # get the dictionary of TOA configuration.
    if OK:
        toa_config_dict = xtoa.get_toa_config_dict()

    # get the result dataset list of the experiment
    if OK:
        experiment_dir = f'{toa_config_dict["RESULT_DIR"]}/{experiment_id}'
        subdir_list = [
            subdir for subdir in os.listdir(experiment_dir)
            if os.path.isdir(os.path.join(experiment_dir, subdir))
        ]
        result_dataset_id_list = []
        for subdir in subdir_list:
            result_dataset_id_list.append(subdir)

    # print the result dataset identification list of the experiment
    if OK:
        print(xlib.get_separator())
        if result_dataset_id_list == []:
            print(
                f'*** WARNING: There is not any result dataset of the experiment/process {experiment_id}.'
            )
        else:
            result_dataset_id_list.sort()
            # set data width
            result_dataset_width = 25
            bioinfo_app_width = 25
            # set line template
            line_template = '{0:' + str(
                result_dataset_width) + '}   {1:' + str(
                    bioinfo_app_width) + '}'
            # print header
            print(
                line_template.format('Result dataset',
                                     'Bioinfo app / Utility'))
            print(
                line_template.format('=' * result_dataset_width,
                                     '=' * bioinfo_app_width))
            # print detail lines
            for result_dataset_id in result_dataset_id_list:

                if result_dataset_id.startswith(xlib.get_blastplus_code() +
                                                '-'):
                    bioinfo_app_name = xlib.get_blastplus_name()

                elif result_dataset_id.startswith(xlib.get_diamond_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_diamond_name()

                elif result_dataset_id.startswith(
                        xlib.get_entrez_direct_code() + '-'):
                    bioinfo_app_name = xlib.get_entrez_direct_name()

                elif result_dataset_id.startswith(xlib.get_miniconda3_code() +
                                                  '-'):
                    bioinfo_app_name = xlib.get_miniconda3_name()

                elif result_dataset_id.startswith(xlib.get_r_code() + '-'):
                    bioinfo_app_name = xlib.get_r_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_basic_data_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_basic_data_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_dicots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_dicots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_gene_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_gene_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_go_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_go_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_gymno_01_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_gymno_01_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_interpro_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_interpro_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_monocots_04_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_monocots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_download_taxonomy_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_download_taxonomy_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.
                        get_toa_process_gilist_viridiplantae_nucleotide_gi_code(
                        ) + '-'):
                    bioinfo_app_name = xlib.get_toa_process_gilist_viridiplantae_nucleotide_gi_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.
                        get_toa_process_gilist_viridiplantae_protein_gi_code()
                        + '-'):
                    bioinfo_app_name = xlib.get_toa_process_gilist_viridiplantae_protein_gi_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_basic_data_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_basic_data_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_dicots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_dicots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_gene_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_gene_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_go_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_go_name()

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_gymno_01_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_gymno_01_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_interpro_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_interpro_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_load_monocots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_load_monocots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_merge_annotations_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_merge_annotations_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_nr_blastplus_db_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_nr_blastplus_db_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_nr_diamond_db_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_nr_diamond_db_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_nt_blastplus_db_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_nt_blastplus_db_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_pipeline_aminoacid_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_pipeline_aminoacid_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_pipeline_nucleotide_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_pipeline_nucleotide_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_dicots_04_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_dicots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_gymno_01_code() + '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_gymno_01_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_monocots_04_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_monocots_04_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_proteome_refseq_plant_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_toa_process_proteome_refseq_plant_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_rebuild_toa_database_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_get_toa_process_rebuild_toa_database_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_toa_process_recreate_toa_database_code() +
                        '-'):
                    bioinfo_app_name = xlib.get_get_toa_process_recreate_toa_database_name(
                    )

                elif result_dataset_id.startswith(
                        xlib.get_transdecoder_code() + '-'):
                    bioinfo_app_name = xlib.get_transdecoder_name()

                else:
                    bioinfo_app_name = 'xxx'

                print(line_template.format(result_dataset_id,
                                           bioinfo_app_name))

    # show continuation message
    print(xlib.get_separator())
    input('Press [Intro] to continue ...')