def do_all_steps(self, df): for name in self.steps: mylog.info("Step {0}".format(name)) error = self._do_single_step(name, df) if error: mylog.error("Step {0} is not completed. Error: {1}".format( name, error))
def __init__(self, driver_exe_path, browser='firefox', profile_path='', browser_binary=None): mylog.debug("Init WebClicker") self.webdriver = None self.profile = None try: if browser == 'firefox': if profile_path: self.profile = wd.FirefoxProfile(profile_path) else: self.profile = None self.webdriver = wd.Firefox(firefox_profile=self.profile, executable_path=driver_exe_path, firefox_binary=browser_binary) else: raise ClickerException( 'Not supported browser {0}'.format(browser)) except Exception as e: mylog.error(e) mylog.error('Fail to initialize {0}'.format(browser))
def clear(self, name, value, partial=False): element = self.find_element(name, value, partial) if element: mylog.debug("Clear element {0}={1}".format(name, value)) element.clear() else: mylog.error("Can't clear element {0}={1}".format(name, value))
def include_if_match_string(df, col_name, val_list): if col_name in df.columns.values.tolist(): for s in val_list: df = df.loc[df[col_name].str.contains(s)] else: mylog.error('Error! Column "{0}" is not found in excel columns'.format(col_name)) return df
def switch_to_frame(self, name, value): element = self.find_element(name, value) if element: self.webdriver.switch_to.frame(element) mylog.debug('Switching to frame {0}={1}'.format(name, value)) return True else: mylog.error("Can't switch to frame {0}={1}".format(name, value)) return False
def get_website(self, url): if not url.startswith(r'https://'): url = r'https://' + url mylog.debug("Trying load '{0}'".format(url)) try: self.webdriver.get(url) return True except WebDriverException: mylog.error("Can't open page: '{0}'".format(url)) return False
def send_ctrl_key(self, name, value, key): element = self.find_element(name, value) if element: element.send_keys(Keys.CONTROL + key + Keys.NULL) mylog.debug("Sent Ctrl-{0} to element {1} {2}".format( key, name, value)) return True else: mylog.error("Can't send Ctrl-{0} to element {1} {2}".format( key, name, value)) return False
def alias_replacement_in_place(working_df: pd.DataFrame, alias_cfg: dict, working_folder: str) -> None: for cfg in alias_cfg: alias_file_path = os.path.join(working_folder, cfg['file']) alias_df, error = read_excel(alias_file_path, replace_nan='') if not error: add_aliases(working_df, cfg['key_col'], cfg['new_col'], alias_dict_by_df(alias_df)) else: mylog.error("Can't use alias file: {0} {1}".format( alias_file_path, error))
def send_string(self, name, value, string: str, end='', partial=False): element = self.find_element(name, value, partial) if not element: mylog.error( "Element {0}: {1} NOT found. String '{2}' NOT sent".format( name, value, string)) return False else: element.send_keys(string + end) mylog.debug("Sent string '{0}' to element {1} {2}".format( string, name, value)) return True
def get_attribute(self, name, value, attribute_name): element = self.find_element(name, value) if element: attribute = element.get_attribute(attribute_name) mylog.debug("Element {0} = '{1}' attribute {2}={3}".format( name, value, attribute_name, attribute)) return attribute else: mylog.error( "Can't read attribute {2}: Element {0}: {1} NOT found.".format( name, value, attribute_name)) return ''
def exclude_data(df, col_name, val_list): if col_name in df.columns.values.tolist(): for val in val_list: # filtered_df = df.loc[df[col_name] != val] filtered_df = df.loc[no_matching(df[col_name], val)] if filtered_df.shape == df.shape: mylog.warning('Warning! Value "{0}" was not found in column "{1}"'.format(val, col_name)) df = filtered_df else: mylog.error('Error! Column "{0}" is not found in excel columns'.format(col_name)) return df
def merge_in_place(working_df: pd.DataFrame, merge_cfg: dict, working_folder) -> None: for cfg in merge_cfg: merge_file_path = os.path.join(working_folder, cfg['file']) merge_df, error = read_excel(merge_file_path, replace_nan='') if not error: lookup_and_add(working_df, key_col=cfg['pos_file_key'], new_col=cfg['new_col'], lookup_dict=lookup_dict_by_df( merge_df, cfg['merge_file_key'], cfg['merge_res_key'])) else: mylog.error("Can't use merge file: {0}".format(merge_file_path))
def switch_to_frame_by_index(self, index, time_sec=10): t = 0 while t < time_sec: try: self.webdriver.switch_to.frame(index) mylog.debug('Switching to frame index={0}; time={1}'.format( index, t)) return True except NoSuchFrameException: self.wait(1) t += 1 mylog.debug('Waiting frame index={0}; time={1}'.format( index, t)) mylog.error("Can't switch to frame index = {0}; time = time_sec") return False
def _wait_element(self, how: str, what: str, timeout_sec=1): for i in range(timeout_sec): if self.is_element_ready(how, what): mylog.debug( 'Element {0}: {1} found. Waiting time is {2} sec'.format( how, what, i)) return True else: self.sleep(1) mylog.debug('Waiting element {0}: {1} {2} sec'.format( how, what, i)) mylog.error("Element {0}: {1} NOT found. Timeout = {2} sec".format( how, what, timeout_sec)) return False
def click(self, name, value, partial=False, time_sec=1): element = self.find_element(name, value, partial) if element: mylog.debug("Click element {0}={1}".format(name, value)) t = 0 while t < time_sec: try: element.click() return True except WebDriverException: self.wait(1) t += 1 else: mylog.error("Can't click element {0}={1}".format(name, value)) return False
def get_element_value(self, how: By, path: str, timeout_sec=30): for i in range(timeout_sec): element = self.get_element(how, path) if element is not None: mylog.debug( 'Element {0}: {1} found. Waiting time is {2} sec'.format( how, path, i)) return element.get_attribute('value') else: self.sleep(1) mylog.debug('Waiting element {0}: {1} {2} sec'.format( how, path, i)) mylog.error("Element {0}: {1} NOT found. Timeout = {2} sec".format( how, path, timeout_sec)) return ''
def update_excel_sheet(updated_sheet_name: str, file_name: str, df: pd.DataFrame, prompt=False, convert_strings_to_urls=True) -> Error: original_sheet_list, error = read_sheet_names(file_name) # overwrite first sheet if updates_sheet_name is empty if len(updated_sheet_name) == 0: updated_sheet_name = original_sheet_list[0] if error: # file doesn't exist yet, try to create new mylog.warning("File {0} doesn't exist. Creating new".format(file_name)) error = write_excel(file_name, df, prompt=prompt, convert_strings_to_urls=convert_strings_to_urls, sheet_name=updated_sheet_name) return error else: # read all existing sheets excel_with_sheets_dict = OrderedDict() # reading all sheets for sheet in original_sheet_list: next_sheet, error = read_excel(file_name, replace_nan='', sheet_name=sheet) if error: mylog.error("Can't read {0} - {1}: {2}".format( file_name, sheet, error)) else: excel_with_sheets_dict[sheet] = next_sheet excel_with_sheets_dict[updated_sheet_name] = df mylog.debug("excel_with_sheets_dict={0}".format( list(excel_with_sheets_dict))) error = write_excel(file_name, excel_with_sheets_dict, prompt=prompt, convert_strings_to_urls=convert_strings_to_urls) return error
def drop_down(self, by_name, how_value, set_value, time_sec=1, repeat_if_fail=0): for repeat in range(repeat_if_fail + 1): success = self.click(by_name, how_value, time_sec=time_sec) if success: success = self.click('link_text', set_value, time_sec=time_sec) if success: mylog.debug("Dropdown {0}='{1}' selected '{2}'".format( by_name, how_value, set_value)) return True mylog.error("Dropdown {0}='{1}' FAIL to select '{2}'".format( by_name, how_value, set_value)) return False
def set_multiple_parameters_by_ispn(*, df: pd.DataFrame, destination_col: str, source_cols: tuple, **options): del options try: index = df.index[df['Ispn'] == destination_col].tolist()[0] except Exception as e: mylog.error("Invalid Ispn {0}: {1}".format(destination_col, e)) return for param_name, new_value in zip(*[iter(source_cols)] * 2): try: if df.at[index, param_name] != '': mylog.warning( "Replacing non-blank value at {0} : {1} to {2}".format( destination_col, df.at[index, param_name], new_value)) df.at[index, param_name] = new_value except Exception as e: mylog.error("Invalid Parameter '{0}' in {1}: {2}".format( param_name, destination_col, e))
def include_only_data(df, col_name, val_list): if not val_list: mylog.warning('Warning: Empty value list for "{0}"', col_name) return if col_name in df.columns.values.tolist(): orig_df = df.copy() # df = df.loc[df[col_name] == val_list[0]] df = df.loc[matching(df[col_name], val_list[0])] for val in val_list[1:]: # filtered_df = orig_df.loc[orig_df[col_name] == val] filtered_df = orig_df.loc[matching(orig_df[col_name], val)] if filtered_df.empty: mylog.warning('Warning! Value "{0}" was not found in column "{1}"'.format(val, col_name)) df = df.append(filtered_df) else: mylog.error('Error! Column "{0}" is not found in excel columns'.format(col_name)) return df
def main(): arg = docopt(__doc__) file_in = arg['--in'] file_out = arg['--out'] df, error = read_excel(file_in, replace_nan='') if error: mylog.error(error) return dt.data_tree = BizDataTree(df, 'POS FY') while True: dt.data_tree.print_console() html = dt.data_tree.render_html(render_method_basic) with open(file_out, "w") as text_file: print(html, file=text_file) node_id = input("Click on id:") expanded, error = dt.data_tree.is_expanded(node_id) if error: mylog.error(error) continue if expanded: dt.data_tree.collapse(node_id) else: drill_by = input("Drill by:") error = dt.data_tree.expand_id(node_id, drill_by) if error: mylog.error(error) continue
def build_tool1(): args = docopt(__doc__) mylog.debug(args) input_folder = '' if args['--in_folder']: input_folder = args['--in_folder'] output_folder = '' if args['--out_folder']: output_folder = args['--out_folder'] main_menu = MainMenuTemplate() output_files_dict = {} for input_file_name in args['--files']: input_file_full_path = os.path.join(input_folder, input_file_name) config_df, error = read_excel(input_file_full_path, replace_nan='', sheet_name='html_config') if error: mylog.error( "Can't process file {0} - sheet html_config: {1}".format( input_file_full_path, error)) continue products_df, error = read_excel(input_file_full_path, replace_nan='', sheet_name='Data') if error: mylog.error("Can't process file {0} - sheet Data: {1}".format( input_file_full_path, error)) continue config_dict = config_df.to_dict('index') row_index_list = list(map(int, list(config_dict))) mylog.debug(row_index_list) for i in row_index_list: row = config_dict[i] output_file_name = row['output_html'] if output_file_name not in output_files_dict: output_files_dict.update( {output_file_name: CompleteToolTemplate()}) main_menu.add_item(row['main_menu_item'], output_file_name) processed_ispn_list = [] for i in row_index_list: row = config_dict[i] mylog.debug("Open data: {0} - {1}".format(input_file_full_path, 'Data')) alias_to_col_name_dict = None try: mylog.info("Open column alias file: {0} - {1}".format( input_file_full_path, 'column_aliases')) col_alias_df, error = read_excel(input_file_full_path, replace_nan='', sheet_name='column_aliases') if error: mylog.error(error) return alias_to_col_name_dict = aliases_to_dict(col_alias_df, 'alias') except FileNotFoundError as e: mylog.error(e) mylog.debug(row) row.setdefault('exclude', '') row.setdefault('include_only', '') row.setdefault('match', '') mylog.debug("exclude='{0}' include='{1}' match='{2}'".format( row['exclude'], row['include_only'], row['match'])) selected_products_df = selected_products( products_df, exclude=row['exclude'], include_only=row['include_only'], match=row['match'], alias_to_col_name_dict=alias_to_col_name_dict) processed_ispn_list.extend(selected_products_df['Ispn'].tolist()) mylog.debug("Build html for '{0}' -> '{1}' -> '{2}'".format( row['category'], row['subcategory'], row['view'])) table_html, error = product_table_to_html( selected_products_df, category=row['category'], subcategory=row['subcategory'], view_name=row['view'], main_topic=row['main_topic'], tree_attributes=row['tree'], part_attributes=row['attributes'], datasheet_url=row['datasheet_url'], view_type=row['view_type'], product_page_url=row['product_page_url'], alias_to_col_name_dict=alias_to_col_name_dict) if error: mylog.error(error) else: template = output_files_dict[row['output_html']] template.add_table(table_html) # mark processed Ispns mylog.info("Marking processed {0} Ispns...".format( len(processed_ispn_list))) products_df['_processed'] = '' products_df.loc[products_df['Ispn'].isin(processed_ispn_list), '_processed'] = 'Y' error = update_excel_sheet('Data', input_file_full_path, products_df, prompt=True, convert_strings_to_urls=False) if error: mylog.error("Can't update {0} with processed Ispns marks".format( input_file_full_path)) mylog.debug(output_files_dict) for file_name in output_files_dict: output_files_dict[file_name].add_main_menu_html( main_menu.make(selected_menu_link=file_name)) output_files_dict[file_name].add_date_info(args['--date']) out_html = output_files_dict[file_name].make() with open(os.path.join(output_folder, file_name), "w", encoding='utf-8') as out_html_file: out_html_file.write(out_html)
def main(): arg = docopt(__doc__) ispns_fn = arg['--ispn'] parameters_fn = arg['--parameters'] docs_fn = arg['--docs'] docs_assignment_fn = arg['--docs_assignment'] folder_name = arg['--folder'] output_fn = arg['--output'] mylog.info(arg) mylog.info('Processing documents...') doc_filter = DocFilter() file_name = os.path.join(folder_name, docs_fn) file_size = os.path.getsize(file_name) mylog.info("File size {0} Bytes".format(file_size)) error = doc_filter.prepare(file_name, progress_indicator=progress, estimated_items_count=int(file_size / 1200)) if error: mylog.error(error) return mylog.info('Processing documents: Done!') mylog.info('Processing document assignment...') file_name = os.path.join(folder_name, docs_assignment_fn) file_size = os.path.getsize(file_name) mylog.info("File size {0} Bytes".format(file_size)) doc_info_df, error = xml2excel_params( file_name, row_key='Ispn', column_key='DocumentGroup', convert2str_method=document_ref_to_str, is_filter_pass=doc_filter.is_english, progress_indicator=progress, estimated_items_count=int(file_size / 950)) if error: mylog.error(error) return mylog.info('Processing document assignment: Done!') mylog.info('Processing parameters...') file_name = os.path.join(folder_name, parameters_fn) file_size = os.path.getsize(file_name) mylog.info("File size {0} Bytes".format(file_size)) ispn_param_df, error = xml2excel_params( file_name, row_key='Ispn', column_key='ParameterName', column_modifier_key='ValueRemark', convert2str_method=ispn_xml_parameters_to_str, progress_indicator=progress, estimated_items_count=int(file_size / 870)) if error: mylog.error(error) return mylog.info('Processing parameters: Done!') mylog.info('Processing ispns...') file_name = os.path.join(folder_name, ispns_fn) file_size = os.path.getsize(file_name) mylog.info("File size {0} Bytes".format(file_size)) ispn_df, error = xml2excel_merge_partnums(file_name, 'Ispn', progress_indicator=progress, estimated_items_count=int( file_size / 2250)) if error: mylog.error(error) return mylog.info('Processing ispns: Done!') merged_df = pd.merge(ispn_df, ispn_param_df, on='Ispn', suffixes=('_1', '_2')) merged_df = pd.merge(merged_df, doc_info_df, on='Ispn', suffixes=('_3', '_4')) # merged_df.replace("", "no_data", inplace=True) error = write_excel(os.path.join(folder_name, output_fn), merged_df, prompt=True, convert_strings_to_urls=False) if error: print("Can't write excel file. {0}".format(error))
def make_product_tables(): arg = docopt(__doc__) mylog.debug(arg) in_df, error = read_excel(arg['--source'], replace_nan='') if error: mylog.error("Can't read file '{0}': {1}".format(arg['--source'], error)) return product_groups = [] for fn in arg['--working_file']: name, ext = fn.split('.', 1) if ext == 'xlsx': product_groups.append(name) else: mylog.error("Wrong filename format {0}".format(fn)) processed_ispn_list = [] for p_group in product_groups: if arg['--only']: if p_group != arg['--only']: continue working_df = in_df.copy() mylog.info('Initialization "{0}"'.format(p_group)) builder = ProductTableBuilder() fn = os.path.join(arg['--working_folder'], p_group + ".xlsx") sheet_name = 'xml_config' mylog.debug("Reading configuration from {0} : {1}".format(fn, sheet_name)) error = builder.init_from_file(fn, sheet_name=sheet_name) if error: mylog.error("Can't read configuration from {0} - {1}: {2}".format(fn, sheet_name, error)) return mylog.info('Performing correction steps...') builder.do_all_steps(working_df) mylog.info("{0} part-numbers processed".format(len(working_df.index))) writing_error = update_excel_sheet('Data', os.path.join(arg['--working_folder'], '{0}.xlsx'.format(p_group)), working_df, prompt=True, convert_strings_to_urls=False) if writing_error: mylog.error(writing_error) processed_ispn_list.extend(working_df['Ispn'].tolist()) # mark processed Ispns if arg['--mark_processed']: mylog.info("Marking processes ispns...") in_df['_processed'] = '' in_df.loc[in_df['Ispn'].isin(processed_ispn_list), '_processed'] = 'Y' mylog.info("Writing back to file {0}...".format(arg['--source'])) error = update_excel_sheet('', arg['--source'], in_df, prompt=True, convert_strings_to_urls=False) if error: mylog.error("Can't update {0} with processed Ispns marks".format(arg['--source']))
def to_xmind(): args = docopt(__doc__) mylog.debug(sys.argv) mylog.debug(args) a_info = args['--info'] a_tree_levels = args['--tree'] a_annotations = args['--ann'] a_notes = args['--note'] a_print = args['--print'] a_file_xlsx = args['FILE_XLSX'] a_file_xmind = args['--xmind'] a_file_html = args['--html'] a_main_topic_name = args['--main'] a_url_col = args['--url'] a_include_only = args['--include_only'] a_exclude = args['--exclude'] a_match = args['--match'] a_add_parameter_names = args['--add_parameter_names'] try: df = pd.read_excel(a_file_xlsx) except: mylog.error("Can't read file: {0}".format(a_file_xlsx)) return df = df.astype(str) header_dict = table_headers_dict(df) header_list = df.columns.values.tolist() for a in a_include_only: col, val = parse_filter_arguments(a) df = include_only_data(df, arg_to_header(col, header_dict, header_list), val) for a in a_exclude: col, val = parse_filter_arguments(a) df = exclude_data(df, arg_to_header(col, header_dict, header_list), val) if a_match: col, val = parse_filter_arguments(a_match) df = include_if_match_string( df, arg_to_header(col, header_dict, header_list), val[0]) root_node = XMindNode(a_main_topic_name) tree_levels = [ arg_to_header(a, header_dict, header_list) for a in a_tree_levels ] tree_levels = [a for a in tree_levels if a] if a_add_parameter_names: parameter_names_tree(tree_levels, root_node) if len(tree_levels) > 1: anns = [ arg_to_header(a, header_dict, header_list) for a in a_annotations ] anns = [a for a in anns if a] notes = [arg_to_header(a, header_dict, header_list) for a in a_notes] notes = [a for a in notes if a] url_col = arg_to_header(a_url_col, header_dict, header_list) if a_info: print_pretty_tree_plan(tree_levels, anns, notes, url_col) table_to_tree(df, tree_levels, root_node, anns, notes, last_level_url_col_name=url_col) if a_info: print_header_value_variation_stat(df) if a_print: print_pretty_tree(root_node, 30) if a_file_xmind: xmb = XMindBuilder(a_file_xmind) print('File "{0}" will be overwritten, data can be lost!'.format( a_file_xmind)) answer = input('Type "yes" if agree >>>') if answer == 'yes': root_node.parent = xmb.central_topic_tree_node xmb.build_from_tree(root_node, xmind_central_topic=xmb.central_topic) xmb.save(a_file_xmind) print('XMIND saved to file "{0}"'.format(a_file_xmind)) else: print('Quited without saving. File was not changed') if a_file_html: html_data = make_html(root_node, SimpleHtmlTemplate) # html_data = html_data.encode(encoding='UTF-8') with open(a_file_html, "w", encoding='utf-8') as html_file: html_file.write(html_data)