def _write_is_valid_row_col(val_sheet, col_index, index_and_range_tuple_by_header_dict): """ :type val_sheet: ValidationWorksheet """ # write is_valid_row column: e.g., =IF(AO2,TRUE,AND(AQ2:BW2)) is_absent_col_index = index_and_range_tuple_by_header_dict[ val_sheet.IS_ABSENT_HEADER][0] is_absent_single_cell_range_str = xlsxbasics.format_range( is_absent_col_index, "{{curr_row_index}}") static_grid_single_row_range_str = xlsxbasics.format_range( val_sheet.first_static_grid_col_index, "{{curr_row_index}}", val_sheet.last_static_grid_col_index) # When {{ and/or }} are put IN to string by a format call, as in first format call here, they aren't collapsed to # { and }, so second (empty) format call does that. is_valid_row_partial_formula = "IF({is_absent_1cell_range_str},TRUE,AND({static_grid_1row_range_str}))".format( is_absent_1cell_range_str=is_absent_single_cell_range_str, static_grid_1row_range_str=static_grid_single_row_range_str).format() return xlsxbasics.copy_formula_throughout_range( val_sheet.worksheet, is_valid_row_partial_formula, col_index, val_sheet.first_data_row_index, last_row_index=val_sheet.last_data_row_index)
def _write_static_validation_grid(val_sheet, schema_dict): """ :type val_sheet: ValidationWorksheet """ curr_grid_col_index = None sorted_keys = xlsxbasics.sort_keys(schema_dict) for field_index, field_name in enumerate(sorted_keys): field_specs_dict = schema_dict[field_name] curr_grid_col_index = val_sheet.first_static_grid_col_index + field_index xlsxbasics.write_header(val_sheet, field_name, curr_grid_col_index, set_width=False) unformatted_formula_str = qiimp.xlsx_validation_builder.get_formula_constraint( field_specs_dict, val_sheet.regex_handler) if unformatted_formula_str is not None: curr_metadata_col_index = val_sheet.first_data_col_index + field_index # metadata_cell_range_str = xlsxbasics.format_single_col_range(val_sheet, curr_metadata_col_index, # sheet_name=val_sheet.metadata_sheet_name) cell_enumerator = xlsxbasics.loop_through_range( curr_grid_col_index, val_sheet.first_data_row_index, last_row_index=val_sheet.last_allowable_row_for_sample_index) for curr_col_index, curr_row_index, curr_cell_range in cell_enumerator: metadata_col_range = xlsxbasics.format_range( curr_metadata_col_index, None, sheet_name=val_sheet.metadata_sheet_name) metadata_cell = xlsxbasics.format_range( curr_metadata_col_index, curr_row_index, sheet_name=val_sheet.metadata_sheet_name) formatted_formula_str = unformatted_formula_str.format( cell=metadata_cell, col_range=metadata_col_range) val_sheet.worksheet.write_formula(curr_cell_range, formatted_formula_str) # xlsxbasics.format_and_write_array_formula(val_sheet, curr_grid_col_index, unformatted_formula_str, # write_col=True, cell_range_str=metadata_cell_range_str) # hide all the columns in the static grid. Use value of curr_grid_col_index left over from last time thru loop. if curr_grid_col_index: val_sheet.hide_columns(val_sheet.first_static_grid_col_index, curr_grid_col_index)
def _format_static_grid_cell_reference_formula_str(val_sheet, row_rank_num, col_rank_num): """ :type val_sheet: xlsx_static_grid_builder.ValidationWorksheet """ # The contents here will be either "TRUE" or "FALSE", depending on whether the relevant cell in the metadata # sheet is valid or invalid according to the static validation grid # # INDEX($AQ$2:$BW$11,MATCH(ROWS($AJ$2:AJ2),$AJ$2:$AJ$11,0),MATCH(COLUMNS($AQ$16:AQ16),$AQ$16:$BW$16,0)) static_grid_fixed_range_str = xlsxbasics.format_range( val_sheet.first_static_grid_col_index, val_sheet.first_data_row_index, last_col_index=val_sheet.last_static_grid_col_index, last_row_index=val_sheet.last_data_row_index, first_col_fixed=True, first_row_fixed=True, last_col_fixed=True, last_row_fixed=True) static_grid_cell_contents = "INDEX({static_grid_fixed_range_str},{row_rank_num},{col_rank_num})".format( static_grid_fixed_range_str=static_grid_fixed_range_str, row_rank_num=row_rank_num, col_rank_num=col_rank_num) return static_grid_cell_contents
def _write_sample_id_col(data_sheet): """ :type data_sheet: xlsxbasics.MetadataWorksheet """ data_sheet.worksheet.set_column(data_sheet.sample_id_col_index, data_sheet.sample_id_col_index, None, None, data_sheet.hidden_cell_setting) xlsxbasics.write_header(data_sheet, "sample_id", data_sheet.sample_id_col_index, set_width=False) # +1 bc range is exclusive of last number for row_index in range(data_sheet.first_data_row_index, data_sheet.last_allowable_row_for_sample_index + 1): curr_cell = xlsxbasics.format_range(data_sheet.sample_id_col_index, row_index) id_num = row_index - data_sheet.first_data_row_index + 1 data_row_range = xlsxbasics.format_single_data_grid_row_range( data_sheet, row_index) completed_formula = "=IF(COUNTBLANK({data_row_range})<>COLUMNS({data_row_range}),{id_num},\"\")".format( data_row_range=data_row_range, id_num=id_num) data_sheet.worksheet.write_formula(curr_cell, completed_formula)
def _format_dynamic_rank_formula_str(val_sheet, curr_range_index, index_and_range_str_tuple_by_header_dict, for_row): """ :type val_sheet: xlsx_static_grid_builder.ValidationWorksheet """ # #1: Get the row_rank for the metadata row that should be shown in this validation row # (e.g., if we're in validation row 5, we should be showing the metadata row with row_rank 5). # MATCH(ROWS($AJ$2:AJ2),$AJ$2:$AJ$11,0) # or # # 2: Get the col_rank for the metadata column that should be shown in this validation column # (e.g., if we're in validation column 6, we should be showing the metadata column with col_rank 6). # MATCH(COLUMNS($AQ$16:AQ16),$AQ$16:$BW$16,0) rank_header = val_sheet.ROW_RANK_HEADER if for_row else val_sheet.COL_RANK_HEADER rank_index_and_range_tuple = index_and_range_str_tuple_by_header_dict[ rank_header] rank_range_index = rank_index_and_range_tuple[0] rank_fixed_range_str = rank_index_and_range_tuple[1] if for_row: excel_func_name = "ROWS" first_col_index = rank_range_index first_row_index = val_sheet.first_data_row_index last_col_index = rank_range_index last_row_index = curr_range_index else: excel_func_name = "COLUMNS" first_col_index = val_sheet.first_static_grid_col_index first_row_index = rank_range_index last_col_index = curr_range_index last_row_index = rank_range_index rank_to_curr_point_range = xlsxbasics.format_range( first_col_index, first_row_index, last_col_index=last_col_index, last_row_index=last_row_index, first_col_fixed=True, first_row_fixed=True) rank_num = "MATCH({excel_func_name}({rank_to_curr_point_range}),{rank_fixed_range_str},0)".format( excel_func_name=excel_func_name, rank_to_curr_point_range=rank_to_curr_point_range, rank_fixed_range_str=rank_fixed_range_str) return rank_num
def write_dynamic_validation_grid(val_sheet, index_and_range_str_tuple_by_header_dict): """ :type val_sheet: xlsx_static_grid_builder.ValidationWorksheet """ _write_dynamic_name_link_col(val_sheet, index_and_range_str_tuple_by_header_dict) # apparently can't add alignment to a conditional format :( centered_format = xlsxbasics.make_format(val_sheet.workbook, {'align': 'center'}) # at outer level, move across columns for curr_col_index in range(val_sheet.first_data_col_index, val_sheet.last_data_col_index + 1): val_sheet.worksheet.set_column(curr_col_index, curr_col_index, None, centered_format) curr_static_grid_col_index = val_sheet.first_static_grid_col_index + curr_col_index - 1 col_rank = _format_dynamic_rank_formula_str( val_sheet, curr_static_grid_col_index, index_and_range_str_tuple_by_header_dict, for_row=False) col_already_valid_condition = _format_range_already_valid_formula_str( val_sheet, col_rank, index_and_range_str_tuple_by_header_dict, for_row=False) _write_dynamic_header_cell(val_sheet, curr_col_index, col_rank) # at inner level, move down rows for curr_row_index in range(val_sheet.first_data_row_index, val_sheet.last_data_row_index + 1): cell_formula = _generate_dynamic_grid_cell_formula_str( val_sheet, col_rank, col_already_valid_condition, curr_row_index, index_and_range_str_tuple_by_header_dict) curr_cell = xlsxbasics.format_range(curr_col_index, curr_row_index) val_sheet.worksheet.write_formula(curr_cell, cell_formula) _write_dynamic_grid_conditional_formatting(val_sheet)
def _write_dynamic_header_cell(val_sheet, curr_col_index, col_rank): # e.g., =IF(B2=" ", " ", INDEX($AS$1:$BY$1, 1, MATCH(COLUMNS($AS$1005:AS1005),$AS$1005:$BY$1005,0))) first_data_cell_in_col = xlsxbasics.format_range( curr_col_index, val_sheet.first_data_row_index) static_grid_header_row = xlsxbasics.format_single_static_grid_row_range( val_sheet, val_sheet.name_row_index, first_col_fixed=True, first_row_fixed=True, last_col_fixed=True, last_row_fixed=True) # if ALL samples are valid in this column, then all the data cells for this column will have a space # in them. Conversely, if ANY sample is invalid in this column, the data cells will either be *empty* (have # not even a space) if a particular sample is valid in that column or will have the word "Fix". I check just # the FIRST data cell in this column to see if it holds a space; if it does, the whole column is valid and # should be hidden, so the "header" will also just be a space. header_formula = "=IF({first_data_cell_in_col}=\" \", \" \", INDEX({static_grid_header_row}, 1, {col_rank}))" \ .format(first_data_cell_in_col=first_data_cell_in_col, static_grid_header_row=static_grid_header_row, col_rank=col_rank) xlsxbasics.write_header(val_sheet, header_formula, curr_col_index)
def _write_dynamic_grid_conditional_formatting(val_sheet): """ :type val_sheet: xlsx_static_grid_builder.ValidationWorksheet """ # Light red fill with dark red text red_format = xlsxbasics.make_format(val_sheet.workbook, { 'bg_color': '#FFC7CE', 'font_color': '#9C0006', 'underline': 1 }) # Green fill with (same) green text green_format = xlsxbasics.make_format(val_sheet.workbook, { 'bg_color': '#C6EFCE', 'font_color': '#C6EFCE' }) # NB: grid goes as far as last allowable row for samples, not just to number of expected samples, in case user # adds some extra ones :) dynamic_grid_range = xlsxbasics.format_range( val_sheet.first_data_col_index, val_sheet.first_data_row_index, val_sheet.last_data_col_index, val_sheet.last_allowable_row_for_sample_index) val_sheet.worksheet.conditional_format(dynamic_grid_range, { 'type': 'cell', 'criteria': '==', 'value': "\"\"", 'format': green_format }) val_sheet.worksheet.conditional_format(dynamic_grid_range, { 'type': 'cell', 'criteria': '==', 'value': "\"Fix\"", 'format': red_format })
def _write_dynamic_name_link_col(val_sheet, index_and_range_str_tuple_by_header_dict): """ :type val_sheet: xlsx_static_grid_builder.ValidationWorksheet """ # e.g., =IF(B2=" "," ",HYPERLINK(CONCATENATE("#metadata!", ADDRESS(INDEX($AL$2:$AL$11,MATCH(ROWS($AJ$2:AJ2), # $AJ$2:$AJ$11,0),0),$AQ$14)),INDEX($AO$2:$AO$11,MATCH(ROWS($AJ$2:AJ2),$AJ$2:$AJ$11,0),0))) # Create the standard blue, underlined url link format. url_format = xlsxbasics.make_format(val_sheet.workbook, { 'font_color': 'blue', 'underline': 1 }) xlsxbasics.write_header(val_sheet, val_sheet.SAMPLE_NAME_HEADER, val_sheet.name_link_col_index) # NB: grid goes as far as last allowable row for samples, not just to number of expected samples, in case user # adds some extra ones :) for curr_row_index in range( val_sheet.first_data_row_index, val_sheet.last_allowable_row_for_sample_index + 1): row_rank_num = _format_dynamic_rank_formula_str( val_sheet, curr_row_index, index_and_range_str_tuple_by_header_dict, for_row=True) row_in_metadata_fixed_range_str = index_and_range_str_tuple_by_header_dict[ val_sheet.ROW_IN_METADATA_HEADER][1] metadata_row_index_str = "INDEX({row_in_metadata_fixed_range_str},{row_rank_num},0)".format( row_in_metadata_fixed_range_str=row_in_metadata_fixed_range_str, row_rank_num=row_rank_num) link_address = "CONCATENATE(\"#metadata!\",ADDRESS({metadata_row_index},{metadata_name_col_index}))".format( metadata_row_index=metadata_row_index_str, metadata_name_col_index=val_sheet.name_col_index) helper_name_fixed_range_str = index_and_range_str_tuple_by_header_dict[ val_sheet.SAMPLE_NAME_HEADER][1] # this index formula will get the value of the name for this sample from the helper col next to the static grid helper_name_val = "INDEX({conditional_name_fixed_range_str},{row_num},0)".format( conditional_name_fixed_range_str=helper_name_fixed_range_str, row_num=row_rank_num) curr_cell = xlsxbasics.format_range(val_sheet.name_link_col_index, curr_row_index) # If this sample is entirely valid as shown by the fact that the first data cell in the dynamic grid for this # row is just an empty string, write a space into the dynamic name cell. Otherwise, write a link to the # name column for this sample in the metadata sheet. NB: it does NOT work to look at # the value in the is_valid helper column for this sample (either True or False) because the samples # change order based on validation status ... first_data_cell_in_first_data_col = xlsxbasics.format_range( val_sheet.name_link_col_index + 1, curr_row_index) full_formula = "=IF({first_validation_cell}=\" \",\" \",HYPERLINK({link_address},{helper_name_val}))".format( first_validation_cell=first_data_cell_in_first_data_col, link_address=link_address, helper_name_val=helper_name_val) val_sheet.worksheet.write_formula(curr_cell, full_formula, url_format)
def write_metadata_grid(data_worksheet, schema_dict, field_descs_sheet_name): """ :type data_worksheet: xlsxbasics.MetadataWorksheet """ _write_sample_id_col(data_worksheet) unlocked = xlsxbasics.make_format(data_worksheet.workbook, is_locked=False) # format as text to prevent autoformatting! unlocked_text = xlsxbasics.make_format(data_worksheet.workbook, {'num_format': '@'}, is_locked=False) sorted_keys = xlsxbasics.sort_keys(schema_dict) for field_index, field_name in enumerate(sorted_keys): field_specs_dict = schema_dict[field_name] curr_col_index = field_index + 1 # add one bc sample id is in first col xlsxbasics.write_header(data_worksheet, field_name, field_index + 1) curr_format = unlocked_text if _determine_if_format_should_be_text( field_specs_dict) else unlocked data_worksheet.worksheet.set_column(curr_col_index, curr_col_index, None, curr_format) col_range = xlsxbasics.format_range(curr_col_index, None) starting_cell_name = xlsxbasics.format_range( curr_col_index, data_worksheet.first_data_row_index) whole_col_range = xlsxbasics.format_range( curr_col_index, data_worksheet.first_data_row_index, last_row_index=data_worksheet.last_allowable_row_for_sample_index) validation_dict = _get_validation_dict(field_name, field_specs_dict, data_worksheet.regex_handler, field_descs_sheet_name) value_key = "value" if validation_dict is not None: if value_key in validation_dict: unformatted_validation_formula = validation_dict[value_key] formatted_validation_formula = unformatted_validation_formula.format( cell=starting_cell_name, col_range=col_range) validation_dict[value_key] = formatted_validation_formula validation_return_code = data_worksheet.worksheet.data_validation( whole_col_range, validation_dict) # NB: xlsxwriter's data_validation docstring *claims* it returns 0 if it succeeds, but in fact if it # succeeds it doesn't return an error code at all, hence the then None check ... if validation_return_code is not None and validation_return_code < 0: raise ValueError( "Worksheet validation failed with return code '{0}'; check user warnings." .format(validation_return_code)) _add_default_if_any(data_worksheet, field_specs_dict, curr_col_index) max_samples_msg = "No more than {0} samples can be entered in this worksheet. If you need to submit metadata" \ " for >{0} samples, please contact CMI directly.".format(data_worksheet.num_allowable_samples) xlsxbasics.write_header( data_worksheet, max_samples_msg, data_worksheet.first_data_col_index, data_worksheet.last_allowable_row_for_sample_index + 1)
def write_metadata_grid(data_worksheet, schema_dict, field_descs_sheet_name): """ :type data_worksheet: xlsxbasics.MetadataWorksheet """ _write_sample_id_col(data_worksheet) unlocked = xlsxbasics.make_format(data_worksheet.workbook, is_locked=False) # format as text to prevent autoformatting! unlocked_text = xlsxbasics.make_format(data_worksheet.workbook, {'num_format': '@'}, is_locked=False) sorted_keys = xlsxbasics.sort_keys(schema_dict) for field_index, field_name in enumerate(sorted_keys): field_specs_dict = schema_dict[field_name] curr_col_index = field_index + 1 # add one bc sample id is in first col xlsxbasics.write_header(data_worksheet, field_name, field_index + 1) curr_format = unlocked_text if _determine_if_format_should_be_text( field_specs_dict) else unlocked # Note: although the xlsxwriter docs say # "If you wish to set the format without changing the width you can # pass None as the width parameter" (https://xlsxwriter.readthedocs.io/ # worksheet.html), it appears that if I call set_column with None for # the width *after* calling write_header (which sets the width # explicitly), the width of the column is reset to the default, thus # requiring me to explicitly set the min column width here too :( data_worksheet.worksheet.set_column(curr_col_index, curr_col_index, xlsxbasics.get_min_col_width(), curr_format) col_range = xlsxbasics.format_range(curr_col_index, None) starting_cell_name = xlsxbasics.format_range( curr_col_index, data_worksheet.first_data_row_index) whole_col_range = xlsxbasics.format_range( curr_col_index, data_worksheet.first_data_row_index, last_row_index=data_worksheet.last_allowable_row_for_sample_index) validation_dict = _get_validation_dict(field_name, field_specs_dict, data_worksheet.regex_handler, field_descs_sheet_name) value_key = "value" if validation_dict is not None: if value_key in validation_dict: unformatted_validation_formula = validation_dict[value_key] formatted_validation_formula = unformatted_validation_formula.format( cell=starting_cell_name, col_range=col_range) validation_dict[value_key] = formatted_validation_formula validation_return_code = data_worksheet.worksheet.data_validation( whole_col_range, validation_dict) # NB: xlsxwriter's data_validation docstring *claims* it returns 0 if it succeeds, but in fact if it # succeeds it doesn't return an error code at all, hence the then None check ... if validation_return_code is not None and validation_return_code < 0: raise ValueError( "Worksheet validation failed with return code '{0}'; check user warnings." .format(validation_return_code)) _add_default_if_any(data_worksheet, field_specs_dict, curr_col_index) max_samples_msg = "No more than {0} samples can be entered in this worksheet. If you need to submit metadata" \ " for >{0} samples, please contact CMI directly.".format(data_worksheet.num_allowable_samples) xlsxbasics.write_header( data_worksheet, max_samples_msg, data_worksheet.first_data_col_index, data_worksheet.last_allowable_row_for_sample_index + 1, set_width=False)