def populate_combobox_process_type(self): ''' Populate data in "combobox_process_type". ''' # clear the value selected in the combobox self.wrapper_process_type.set('') # initialize the process type list process_type_list = [] # get the dictionary of TOA configuration. toa_config_dict = xtoa.get_toa_config_dict() # get the process type list subdir_list = [ subdir for subdir in os.listdir(toa_config_dict['RESULT_DIR']) if os.path.isdir( os.path.join(toa_config_dict['RESULT_DIR'], subdir)) ] for subdir in subdir_list: process_type_list.append(subdir) # check if there are any process type if process_type_list == []: message = 'There is not any run.' tkinter.messagebox.showwarning( f'{xlib.get_short_project_name()} - {self.head}', message) return # load the names of process types self.combobox_process_type['values'] = sorted(process_type_list)
def input_result_dataset_id(experiment_id, app_list): ''' Input a result dataset identification. ''' # initialize the control variable OK = True # get the dictionary of TOA configuration. toa_config_dict = xtoa.get_toa_config_dict() # initialize the result dataset identification result_dataset_id = '' # initialize the result dataset list result_dataset_id_list = [] # get the result dataset identifications of the experiment experiment_dir = f'''{toa_config_dict['RESULT_DIR']}/{experiment_id}''' subdir_list = sorted([ subdir for subdir in os.listdir(experiment_dir) if os.path.isdir(os.path.join(experiment_dir, subdir)) ]) for subdir in subdir_list: for app in app_list: if app == xlib.get_all_applications_selected_code( ) or subdir.startswith(app): result_dataset_id_list.append(subdir) break # print the result dataset identifications in the clusters if result_dataset_id_list != []: result_dataset_id_list_text = str(result_dataset_id_list).strip( '[]').replace('\'', '') print( f'dataset ids existing in {experiment_id}: {result_dataset_id_list_text} ...' ) else: OK = False # input and check the result dataset identification if OK: while result_dataset_id == '': result_dataset_id = input('... Enter the dataset id: ') if result_dataset_id not in result_dataset_id_list: print(f'*** ERROR: {result_dataset_id} does not exist.') result_dataset_id = '' # return the result dataset identification return result_dataset_id
def form_view_result_log(): ''' View the log of an experiment/process result. ''' # initialize the control variable OK = True # print the header clib.clear_screen() clib.print_headers_with_environment( 'Logs - View an experiment/process result log') # get the experiment identification if OK: experiment_id = cinputs.input_experiment_id() if experiment_id == '': print('WARNING: There is not any experiment/process data.') OK = False # get the result_dataset identification if OK: result_dataset_id = cinputs.input_result_dataset_id( experiment_id, xlib.get_all_applications_selected_code()) if result_dataset_id == '': print( f'WARNING: The experiment/process {experiment_id} does not have result datasets.' ) OK = False # get the dictionary of TOA configuration. if OK: toa_config_dict = xtoa.get_toa_config_dict() # get the log file name and build local and cluster paths if OK: log_file = f'{toa_config_dict["RESULT_DIR"]}/{experiment_id}/{result_dataset_id}/{xlib.get_run_log_file()}' # view the log file if OK: text = 'Logs - View an experiment/process log' OK = clib.view_file(log_file, text) # show continuation message input('Press [Intro] to continue ...')
def input_experiment_id(): ''' Input an experiment/process identification. ''' # initialize the control variable OK = True # initialize the experiment/process identification experiment_id = '' # initialize the experiment/process identification list experiment_id_list = [] # get the dictionary of TOA configuration. toa_config_dict = xtoa.get_toa_config_dict() # get the experiment/process identifications subdir_list = [ subdir for subdir in os.listdir(toa_config_dict['RESULT_DIR']) if os.path.isdir(os.path.join(toa_config_dict['RESULT_DIR'], subdir)) ] for subdir in subdir_list: experiment_id_list.append(subdir) # print the experiment/process identifications in the clusters if experiment_id_list != []: experiment_id_list_text = str(experiment_id_list).strip('[]').replace( '\'', '') print( f'Experiment/process ids existing: {experiment_id_list_text} ...') else: OK = False # input and check the experiment/process identification if OK: while experiment_id == '': experiment_id = input('... Enter the experiment/process id: ') if experiment_id not in experiment_id_list: print(f'*** ERROR: {experiment_id} does not exist.') experiment_id = '' # return the experiment/process identification return experiment_id
def execute(self, event=None): ''' Execute the list the result logs in the cluster. ''' # if "button_execute" is disabled, exit function if str(self.button_execute['state']) == 'disabled': return # check inputs OK = self.check_inputs() if not OK: message = 'Some input values are not OK.' tkinter.messagebox.showerror( f'{xlib.get_short_project_name()} - {self.head}', message) # get the dictionary of TOA configuration. if OK: toa_config_dict = xtoa.get_toa_config_dict() # get the run dictionary if OK: process_type_dir = f'{toa_config_dict["RESULT_DIR"]}/{self.wrapper_process_type.get()}' subdir_list = [ subdir for subdir in os.listdir(process_type_dir) if os.path.isdir(os.path.join(process_type_dir, subdir)) ] result_dataset_dict = {} for subdir in subdir_list: result_dataset_id = subdir try: pattern = r'^(.+)\-(.+)\-(.+)$' mo = re.search(pattern, result_dataset_id) bioinfo_app_code = mo.group(1).strip() yymmdd = mo.group(2) hhmmss = mo.group(3) date = f'20{yymmdd[:2]}-{yymmdd[2:4]}-{yymmdd[4:]}' time = f'{hhmmss[:2]}:{hhmmss[2:4]}:{hhmmss[4:]}' except: bioinfo_app_code = 'xxx' date = '0000-00-00' time = '00:00:00' if result_dataset_id.startswith(xlib.get_blastplus_code() + '-'): bioinfo_app_name = xlib.get_blastplus_name() elif result_dataset_id.startswith(xlib.get_diamond_code() + '-'): bioinfo_app_name = xlib.get_diamond_name() elif result_dataset_id.startswith( xlib.get_entrez_direct_code() + '-'): bioinfo_app_name = xlib.get_entrez_direct_name() elif result_dataset_id.startswith(xlib.get_miniconda3_code() + '-'): bioinfo_app_name = xlib.get_miniconda3_name() elif result_dataset_id.startswith(xlib.get_r_code() + '-'): bioinfo_app_name = xlib.get_r_name() elif result_dataset_id.startswith( xlib.get_toa_process_download_basic_data_code() + '-'): bioinfo_app_name = xlib.get_toa_process_download_basic_data_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_download_dicots_04_code() + '-'): bioinfo_app_name = xlib.get_toa_process_download_dicots_04_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_download_gene_code() + '-'): bioinfo_app_name = xlib.get_toa_process_download_gene_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_download_go_code() + '-'): bioinfo_app_name = xlib.get_toa_process_download_go_name() elif result_dataset_id.startswith( xlib.get_toa_process_download_gymno_01_code() + '-'): bioinfo_app_name = xlib.get_toa_process_download_gymno_01_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_download_interpro_code() + '-'): bioinfo_app_name = xlib.get_toa_process_download_interpro_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_download_monocots_04_code() + '-'): bioinfo_app_name = xlib.get_toa_process_download_monocots_04_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_download_taxonomy_code() + '-'): bioinfo_app_name = xlib.get_toa_process_download_taxonomy_name( ) elif result_dataset_id.startswith( xlib. get_toa_process_gilist_viridiplantae_nucleotide_gi_code( ) + '-'): bioinfo_app_name = xlib.get_toa_process_gilist_viridiplantae_nucleotide_gi_name( ) elif result_dataset_id.startswith( xlib. get_toa_process_gilist_viridiplantae_protein_gi_code() + '-'): bioinfo_app_name = xlib.get_toa_process_gilist_viridiplantae_protein_gi_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_load_basic_data_code() + '-'): bioinfo_app_name = xlib.get_toa_process_load_basic_data_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_load_dicots_04_code() + '-'): bioinfo_app_name = xlib.get_toa_process_load_dicots_04_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_load_gene_code() + '-'): bioinfo_app_name = xlib.get_toa_process_load_gene_name() elif result_dataset_id.startswith( xlib.get_toa_process_load_go_code() + '-'): bioinfo_app_name = xlib.get_toa_process_load_go_name() elif result_dataset_id.startswith( xlib.get_toa_process_load_gymno_01_code() + '-'): bioinfo_app_name = xlib.get_toa_process_load_gymno_01_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_load_interpro_code() + '-'): bioinfo_app_name = xlib.get_toa_process_load_interpro_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_load_monocots_04_code() + '-'): bioinfo_app_name = xlib.get_toa_process_load_monocots_04_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_merge_annotations_code() + '-'): bioinfo_app_name = xlib.get_toa_process_merge_annotations_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_nr_blastplus_db_code() + '-'): bioinfo_app_name = xlib.get_toa_process_nr_blastplus_db_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_nr_diamond_db_code() + '-'): bioinfo_app_name = xlib.get_toa_process_nr_diamond_db_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_nt_blastplus_db_code() + '-'): bioinfo_app_name = xlib.get_toa_process_nt_blastplus_db_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_pipeline_aminoacid_code() + '-'): bioinfo_app_name = xlib.get_toa_process_pipeline_aminoacid_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_pipeline_nucleotide_code() + '-'): bioinfo_app_name = xlib.get_toa_process_pipeline_nucleotide_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_proteome_dicots_04_code() + '-'): bioinfo_app_name = xlib.get_toa_process_proteome_dicots_04_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_proteome_gymno_01_code() + '-'): bioinfo_app_name = xlib.get_toa_process_proteome_gymno_01_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_proteome_monocots_04_code() + '-'): bioinfo_app_name = xlib.get_toa_process_proteome_monocots_04_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_proteome_refseq_plant_code() + '-'): bioinfo_app_name = xlib.get_toa_process_proteome_refseq_plant_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_rebuild_toa_database_code() + '-'): bioinfo_app_name = xlib.get_get_toa_process_rebuild_toa_database_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_recreate_toa_database_code() + '-'): bioinfo_app_name = xlib.get_get_toa_process_recreate_toa_database_name( ) elif result_dataset_id.startswith( xlib.get_transdecoder_code() + '-'): bioinfo_app_name = xlib.get_transdecoder_name() else: bioinfo_app_name = 'xxx' status_ok = os.path.isfile( xlib.get_status_ok(os.path.join(process_type_dir, subdir))) status_wrong = os.path.isfile( xlib.get_status_wrong( os.path.join(process_type_dir, subdir))) if status_ok and not status_wrong: status = 'OK' elif not status_ok and status_wrong: status = 'wrong' elif not status_ok and not status_wrong: status = 'not finished' elif status_ok and status_wrong: status = 'undetermined' key = f'{bioinfo_app_name}-{result_dataset_id}' result_dataset_dict[key] = { 'process_type': self.wrapper_process_type.get(), 'bioinfo_app': bioinfo_app_name, 'result_dataset_id': result_dataset_id, 'date': date, 'time': time, 'status': status } # check if there are any nodes running if OK: if result_dataset_dict == {}: message = 'There is not any run.' tkinter.messagebox.showwarning( f'{xlib.get_short_project_name()} - {self.head}', message) # build the data list if OK: data_list = [ 'process_type', 'bioinfo_app', 'result_dataset_id', 'date', 'time', 'status' ] # build the data dictionary if OK: data_dict = {} data_dict['process_type'] = { 'text': 'Process type', 'width': 180, 'alignment': 'left' } data_dict['bioinfo_app'] = { 'text': 'Bioinfo app / Utility', 'width': 340, 'alignment': 'left' } data_dict['result_dataset_id'] = { 'text': 'Result dataset', 'width': 225, 'alignment': 'left' } data_dict['date'] = { 'text': 'Date', 'width': 95, 'alignment': 'right' } data_dict['time'] = { 'text': 'Time', 'width': 75, 'alignment': 'right' } data_dict['status'] = { 'text': 'Status', 'width': 90, 'alignment': 'left' } # create the dialog Table to show the nodes running if OK: dialog_table = gdialogs.DialogTable( self, f'Runs in {xlib.get_result_dir()}/{self.wrapper_process_type.get()}', 400, 1030, data_list, data_dict, result_dataset_dict, sorted(result_dataset_dict.keys()), 'view_result_logs', ['revisar']) self.wait_window(dialog_table) # close the form if OK: self.close()
def form_view_phylogenic_data_frecuency(stats_code): ''' View the frecuency distribution of phylogenic data. ''' # initialize the control variable OK = True # assign the text of the "name" if stats_code == 'species': name = 'Species - Frequency distribution' elif stats_code == 'family': name = 'Family - Frequency distribution' elif stats_code == 'phylum': name = 'Phylum - Frequency distribution' elif stats_code == 'namespace': name = 'GO - Frequency distribution per namespace' # print the header clib.clear_screen() clib.print_headers_with_environment(f'Statistics - {name} data') # get the pipeline dataset identification app_list = [ xlib.get_toa_process_pipeline_nucleotide_code(), xlib.get_toa_process_pipeline_aminoacid_code(), xlib.get_toa_process_merge_annotations_code() ] pipeline_dataset_id = cinputs.input_result_dataset_id( xlib.get_toa_result_pipeline_dir(), app_list) if pipeline_dataset_id == '': print( 'WARNING: There are not any annotation pipeline result datasets.') OK = False # build distribution dictionary if OK: # initialize the distribution dictionary distribution_dict = {} # get the dictionary of TOA configuration toa_config_dict = xtoa.get_toa_config_dict() # get the statistics file path stats_file = f'{toa_config_dict["RESULT_DIR"]}/{xlib.get_toa_result_pipeline_dir()}/{pipeline_dataset_id}/{toa_config_dict["STATS_SUBDIR_NAME"]}/{stats_code}-{toa_config_dict["STATS_BASE_NAME"]}.csv' # open the statistics file if stats_file.endswith('.gz'): try: stats_file_id = gzip.open(stats_file, mode='rt', encoding='iso-8859-1', newline='\n') except Exception as e: raise xlib.ProgramException('F002', stats_file) else: try: stats_file_id = open(stats_file, mode='r', encoding='iso-8859-1', newline='\n') except Exception as e: raise xlib.ProgramException('F001', stats_file) # initialize the record counter record_counter = 0 # initialize the header record control header_record = True # read the first record record = stats_file_id.readline() # while there are records while record != '': # add 1 to the record counter record_counter += 1 # process the header record if header_record: header_record = False # process data records else: # extract data # record format: "stats_code_id";"all_count";"first_hsp_count";"min_evalue_count" data_list = [] begin = 0 for end in [i for i, chr in enumerate(record) if chr == ';']: data_list.append(record[begin:end].strip('"')) begin = end + 1 data_list.append(record[begin:].strip('\n').strip('"')) try: id = data_list[0] all_count = data_list[1] first_hsp_count = data_list[2] min_evalue_count = data_list[3] except Exception as e: raise xlib.ProgramException('F006', os.path.basename(stats_file), record_counter) # add dato to the dictionary distribution_dict[id] = { 'id': id, 'all_count': all_count, 'first_hsp_count': first_hsp_count, 'min_evalue_count': min_evalue_count } # read the next record record = stats_file_id.readline() # print the distribution if OK: print(xlib.get_separator()) if distribution_dict == {}: print('*** WARNING: There is not any distribution.') else: # set data width id_width = 50 all_count_width = 11 first_hsp_count_width = 11 min_evalue_count_width = 11 # set line template line_template = '{0:' + str(id_width) + '} {1:' + str( all_count_width) + '} {2:' + str( first_hsp_count_width) + '} {3:' + str( min_evalue_count_width) + '}' # print header print( line_template.format(stats_code.capitalize(), 'All', 'First HSP', 'Min e-value')) print( line_template.format('=' * id_width, '=' * all_count_width, '=' * first_hsp_count_width, '=' * min_evalue_count_width)) # print detail lines for key in sorted(distribution_dict.keys()): print( line_template.format( distribution_dict[key]['id'], distribution_dict[key]['all_count'], distribution_dict[key]['first_hsp_count'], distribution_dict[key]['min_evalue_count'])) # show continuation message print(xlib.get_separator()) input('Press [Intro] to continue ...')
def form_view_dataset_data_frecuency(): ''' View the frecuency distribution of annotation dataset data. ''' # initialize the control variable OK = True # print the header clib.clear_screen() clib.print_headers_with_environment( 'Statistics - Annotation datasets - Frequency distribution data') # get the pipeline dataset identification app_list = [ xlib.get_toa_process_pipeline_nucleotide_code(), xlib.get_toa_process_pipeline_aminoacid_code() ] pipeline_dataset_id = cinputs.input_result_dataset_id( xlib.get_toa_result_pipeline_dir(), app_list) if pipeline_dataset_id == '': print( 'WARNING: There are not any annotation pipeline result datasets.') OK = False # build distribution dictionary if OK: # initialize the distribution dictionary distribution_dict = {} # get the dictionary of TOA configuration toa_config_dict = xtoa.get_toa_config_dict() # get the statistics file path stats_file = f'{toa_config_dict["RESULT_DIR"]}/{xlib.get_toa_result_pipeline_dir()}/{pipeline_dataset_id}/{toa_config_dict["STATS_SUBDIR_NAME"]}/dataset-{toa_config_dict["STATS_BASE_NAME"]}.csv' # open the statistics file if stats_file.endswith('.gz'): try: stats_file_id = gzip.open(stats_file, mode='rt', encoding='iso-8859-1', newline='\n') except Exception as e: raise xlib.ProgramException('F002', stats_file) else: try: stats_file_id = open(stats_file, mode='r', encoding='iso-8859-1', newline='\n') except Exception as e: raise xlib.ProgramException('F001', stats_file) # initialize the record counter record_counter = 0 # initialize the header record control header_record = True # read the first record record = stats_file_id.readline() # while there are records while record != '': # add 1 to the record counter record_counter += 1 # process the header record if header_record: header_record = False # process data records else: # extract data # record format: "dataset_name";"annotated_seq_count";"remained_seq_count" data_list = [] begin = 0 for end in [i for i, chr in enumerate(record) if chr == ';']: data_list.append(record[begin:end].strip('"')) begin = end + 1 data_list.append(record[begin:].strip('\n').strip('"')) try: dataset_name = data_list[0] annotated_seq_count = data_list[1] remained_seq_count = data_list[2] except Exception as e: raise xlib.ProgramException('F006', os.path.basename(stats_file), record_counter) # add dato to the dictionary distribution_dict[record_counter] = { 'dataset_name': dataset_name, 'annotated_seq_count': annotated_seq_count, 'remained_seq_count': remained_seq_count } # read the next record record = stats_file_id.readline() # print the distribution if OK: print(xlib.get_separator()) if distribution_dict == {}: print('*** WARNING: There is not any distribution.') else: # set data width dataset_name_width = 19 annotated_seq_count_width = 14 remained_seq_count_width = 14 # set line template line_template = '{0:' + str(dataset_name_width) + '} {1:' + str( annotated_seq_count_width) + '} {2:' + str( remained_seq_count_width) + '}' # print header print( line_template.format('Dataset', 'Annotated seqs', 'Remained seqs')) print( line_template.format('=' * dataset_name_width, '=' * annotated_seq_count_width, '=' * remained_seq_count_width)) # print detail lines for key in sorted(distribution_dict.keys()): print( line_template.format( distribution_dict[key]['dataset_name'], distribution_dict[key]['annotated_seq_count'], distribution_dict[key]['remained_seq_count'])) # show continuation message print(xlib.get_separator()) input('Press [Intro] to continue ...')
def form_view_x_per_y_data(stats_code): ''' View the x per y data. ''' # initialize the control variable OK = True # assign the text of the "name" if stats_code == 'hit_per_hsp': name = '# HITs per # HSPs' elif stats_code == 'seq_per_go': name = '# sequences per # GO terms' elif stats_code == 'seq_per_ec': name = '# sequences per # EC ids' elif stats_code == 'seq_per_interpro': name = '# sequences per # InterPro ids' elif stats_code == 'seq_per_kegg': name = '# sequences per # KEGG ids' elif stats_code == 'seq_per_mapman': name = '# sequences per # MapMan ids' elif stats_code == 'seq_per_metacyc': name = '# sequences per # MetaCyc ids' # print the header clib.clear_screen() clib.print_headers_with_environment(f'Statistics - {name} data') # get the pipeline dataset identification if stats_code == 'hit_per_hsp': app_list = [ xlib.get_toa_process_pipeline_nucleotide_code(), xlib.get_toa_process_pipeline_aminoacid_code() ] else: app_list = [ xlib.get_toa_process_pipeline_nucleotide_code(), xlib.get_toa_process_pipeline_aminoacid_code(), xlib.get_toa_process_merge_annotations_code() ] pipeline_dataset_id = cinputs.input_result_dataset_id( xlib.get_toa_result_pipeline_dir(), app_list) if pipeline_dataset_id == '': print( 'WARNING: There are not any annotation pipeline result datasets.') OK = False # build distribution dictionary if OK: # initialize the distribution dictionary distribution_dict = {} # get the dictionary of TOA configuration toa_config_dict = xtoa.get_toa_config_dict() # get the statistics file path stats_file = f'{toa_config_dict["RESULT_DIR"]}/{xlib.get_toa_result_pipeline_dir()}/{pipeline_dataset_id}/{toa_config_dict["STATS_SUBDIR_NAME"]}/{stats_code}-{toa_config_dict["STATS_BASE_NAME"]}.csv' # open the statistics file if stats_file.endswith('.gz'): try: stats_file_id = gzip.open(stats_file, mode='rt', encoding='iso-8859-1', newline='\n') except Exception as e: raise xlib.ProgramException('F002', stats_file) else: try: stats_file_id = open(stats_file, mode='r', encoding='iso-8859-1', newline='\n') except Exception as e: raise xlib.ProgramException('F001', stats_file) # initialize the record counter record_counter = 0 # initialize the header record control header_record = True # read the first record record = stats_file_id.readline() # while there are records while record != '': # add 1 to the record counter record_counter += 1 # process the header record if header_record: header_record = False # process data records else: # extract data # record format: "x_count";"y_count" data_list = [] begin = 0 for end in [i for i, chr in enumerate(record) if chr == ';']: data_list.append(record[begin:end].strip('"')) begin = end + 1 data_list.append(record[begin:].strip('\n').strip('"')) try: x_count = data_list[0] y_count = data_list[1] except Exception as e: raise xlib.ProgramException('F006', os.path.basename(stats_file), record_counter) # add dato to the dictionary distribution_dict[record_counter] = { 'x_count': x_count, 'y_count': y_count } # read the next record record = stats_file_id.readline() # print the distribution if OK: print(xlib.get_separator()) if distribution_dict == {}: print('*** WARNING: There is not any stats data.') else: # set data width x_count_width = 15 y_count_width = 15 # set line template line_template = '{0:' + str(x_count_width) + '} {1:' + str( y_count_width) + '}' # print header if stats_code == 'hit_per_hsp': print(line_template.format('# HSPs', '# HITs')) elif stats_code == 'seq_per_go': print(line_template.format('# GO terms', '# sequences')) elif stats_code == 'seq_per_ec': print(line_template.format('# EC ids', '# sequences')) elif stats_code == 'seq_per_interpro': print(line_template.format('# InterPro ids', '# sequences')) elif stats_code == 'seq_per_kegg': print(line_template.format('# KEGG ids', '# sequences')) elif stats_code == 'seq_per_mapman': print(line_template.format('# MapMan ids', '# sequences')) elif stats_code == 'seq_per_metacyc': print(line_template.format('# MetaCyc ids', '# sequences')) print( line_template.format('=' * x_count_width, '=' * y_count_width)) # print detail lines for key in sorted(distribution_dict.keys()): print( line_template.format(distribution_dict[key]['x_count'], distribution_dict[key]['y_count'])) # show continuation message print(xlib.get_separator()) input('Press [Intro] to continue ...')
def form_list_results_logs(): ''' List the processes of an experiment in the cluster. ''' # initialize the control variable OK = True # print the header clib.clear_screen() clib.print_headers_with_environment('Logs - List result logs') # get experiment identification experiment_id = cinputs.input_experiment_id() if experiment_id == '': print('WARNING: There is not any experiment/process run.') OK = False # get the dictionary of TOA configuration. if OK: toa_config_dict = xtoa.get_toa_config_dict() # get the result dataset list of the experiment if OK: experiment_dir = f'{toa_config_dict["RESULT_DIR"]}/{experiment_id}' subdir_list = [ subdir for subdir in os.listdir(experiment_dir) if os.path.isdir(os.path.join(experiment_dir, subdir)) ] result_dataset_id_list = [] for subdir in subdir_list: result_dataset_id_list.append(subdir) # print the result dataset identification list of the experiment if OK: print(xlib.get_separator()) if result_dataset_id_list == []: print( f'*** WARNING: There is not any result dataset of the experiment/process {experiment_id}.' ) else: result_dataset_id_list.sort() # set data width result_dataset_width = 25 bioinfo_app_width = 25 # set line template line_template = '{0:' + str( result_dataset_width) + '} {1:' + str( bioinfo_app_width) + '}' # print header print( line_template.format('Result dataset', 'Bioinfo app / Utility')) print( line_template.format('=' * result_dataset_width, '=' * bioinfo_app_width)) # print detail lines for result_dataset_id in result_dataset_id_list: if result_dataset_id.startswith(xlib.get_blastplus_code() + '-'): bioinfo_app_name = xlib.get_blastplus_name() elif result_dataset_id.startswith(xlib.get_diamond_code() + '-'): bioinfo_app_name = xlib.get_diamond_name() elif result_dataset_id.startswith( xlib.get_entrez_direct_code() + '-'): bioinfo_app_name = xlib.get_entrez_direct_name() elif result_dataset_id.startswith(xlib.get_miniconda3_code() + '-'): bioinfo_app_name = xlib.get_miniconda3_name() elif result_dataset_id.startswith(xlib.get_r_code() + '-'): bioinfo_app_name = xlib.get_r_name() elif result_dataset_id.startswith( xlib.get_toa_process_download_basic_data_code() + '-'): bioinfo_app_name = xlib.get_toa_process_download_basic_data_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_download_dicots_04_code() + '-'): bioinfo_app_name = xlib.get_toa_process_download_dicots_04_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_download_gene_code() + '-'): bioinfo_app_name = xlib.get_toa_process_download_gene_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_download_go_code() + '-'): bioinfo_app_name = xlib.get_toa_process_download_go_name() elif result_dataset_id.startswith( xlib.get_toa_process_download_gymno_01_code() + '-'): bioinfo_app_name = xlib.get_toa_process_download_gymno_01_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_download_interpro_code() + '-'): bioinfo_app_name = xlib.get_toa_process_download_interpro_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_download_monocots_04_code() + '-'): bioinfo_app_name = xlib.get_toa_process_download_monocots_04_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_download_taxonomy_code() + '-'): bioinfo_app_name = xlib.get_toa_process_download_taxonomy_name( ) elif result_dataset_id.startswith( xlib. get_toa_process_gilist_viridiplantae_nucleotide_gi_code( ) + '-'): bioinfo_app_name = xlib.get_toa_process_gilist_viridiplantae_nucleotide_gi_name( ) elif result_dataset_id.startswith( xlib. get_toa_process_gilist_viridiplantae_protein_gi_code() + '-'): bioinfo_app_name = xlib.get_toa_process_gilist_viridiplantae_protein_gi_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_load_basic_data_code() + '-'): bioinfo_app_name = xlib.get_toa_process_load_basic_data_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_load_dicots_04_code() + '-'): bioinfo_app_name = xlib.get_toa_process_load_dicots_04_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_load_gene_code() + '-'): bioinfo_app_name = xlib.get_toa_process_load_gene_name() elif result_dataset_id.startswith( xlib.get_toa_process_load_go_code() + '-'): bioinfo_app_name = xlib.get_toa_process_load_go_name() elif result_dataset_id.startswith( xlib.get_toa_process_load_gymno_01_code() + '-'): bioinfo_app_name = xlib.get_toa_process_load_gymno_01_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_load_interpro_code() + '-'): bioinfo_app_name = xlib.get_toa_process_load_interpro_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_load_monocots_04_code() + '-'): bioinfo_app_name = xlib.get_toa_process_load_monocots_04_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_merge_annotations_code() + '-'): bioinfo_app_name = xlib.get_toa_process_merge_annotations_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_nr_blastplus_db_code() + '-'): bioinfo_app_name = xlib.get_toa_process_nr_blastplus_db_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_nr_diamond_db_code() + '-'): bioinfo_app_name = xlib.get_toa_process_nr_diamond_db_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_nt_blastplus_db_code() + '-'): bioinfo_app_name = xlib.get_toa_process_nt_blastplus_db_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_pipeline_aminoacid_code() + '-'): bioinfo_app_name = xlib.get_toa_process_pipeline_aminoacid_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_pipeline_nucleotide_code() + '-'): bioinfo_app_name = xlib.get_toa_process_pipeline_nucleotide_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_proteome_dicots_04_code() + '-'): bioinfo_app_name = xlib.get_toa_process_proteome_dicots_04_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_proteome_gymno_01_code() + '-'): bioinfo_app_name = xlib.get_toa_process_proteome_gymno_01_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_proteome_monocots_04_code() + '-'): bioinfo_app_name = xlib.get_toa_process_proteome_monocots_04_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_proteome_refseq_plant_code() + '-'): bioinfo_app_name = xlib.get_toa_process_proteome_refseq_plant_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_rebuild_toa_database_code() + '-'): bioinfo_app_name = xlib.get_get_toa_process_rebuild_toa_database_name( ) elif result_dataset_id.startswith( xlib.get_toa_process_recreate_toa_database_code() + '-'): bioinfo_app_name = xlib.get_get_toa_process_recreate_toa_database_name( ) elif result_dataset_id.startswith( xlib.get_transdecoder_code() + '-'): bioinfo_app_name = xlib.get_transdecoder_name() else: bioinfo_app_name = 'xxx' print(line_template.format(result_dataset_id, bioinfo_app_name)) # show continuation message print(xlib.get_separator()) input('Press [Intro] to continue ...')