def scroll_div(num): xpath_scroll_elem = '//div[contains(@role, "dialog")]//div/ul/..' class_scroll = browser.find_element_by_xpath( xpath_scroll_elem).get_attribute('class') jscommand = """ followers = document.querySelector(".%s"); followers.scrollTo(0, followers.scrollHeight); var lenOfPage=followers.scrollHeight; return lenOfPage; """ % class_scroll lenOfPage = browser.execute_script(jscommand) match = False now = time.time() time.sleep(0.01) count = 0 fetched = False while (match == False): lastCount = lenOfPage time.sleep(1) lenOfPage = browser.execute_script(jscommand) num_current_follows = len( browser.find_elements_by_xpath( "//a[contains(@class,'notranslate')]")) if lastCount == lenOfPage: if num > num_current_follows: fetched = False else: match = True else: fetched = True if fetched: count = 0 else: count += 1 Progress.progress(count=num_current_follows, total=num, now=now, message='Fetching...') if count > 15: print( '\n--> It has been too long time that program could not fetch new data. Now will ignore and continue to process.' ) match = True
def excel_read_to_dict(excel, number_of_sheet=0, exit_all=False): all_data = dict() headers = dict() try: # Check and add xlsx or xls if there is not at the end. file_name, file_extension = os.path.splitext(excel) if file_extension != '.xlsx' or file_extension != '.xls': excel = file_name + '.xlsx' # check all versions of the file name if it is exist in directory. # (Checking with all lower and capital characters for excel name if it is equal any file.) excel = find_file(excel) if not os.path.exists(excel): # So given file name could not be found in directory with any combinations of capital and lower characters. excel2 = None # switch between xlsx and xls if file_extension != '.xlsx': excel2 = file_name + '.xls' elif file_extension != '.xls': excel2 = file_name + '.xlsx' if excel2: # if given file name is xlsx, it switched to xls in "excel2" # if given file name is xls, it switched to xlsx in "excel2" # and checking again... excel2 = find_file(excel2) if not os.path.exists(excel2): message = "! ! File couldn't be found in folder. --> '%s' or '%s'" % ( excel, excel2) Progress.exit_app(message=message, exit_all=exit_all) return all_data, headers else: excel = excel2 else: message = "! ! File couldn't be found in folder. --> '%s'" % ( excel) Progress.exit_app(message=message, exit_all=exit_all) return all_data, headers workbook = xlrd.open_workbook(excel) # sheet sheet = workbook.sheet_by_index(number_of_sheet) # page number_of_column = sheet.ncols number_of_row = len(sheet.col(0)) count = 0 total = number_of_row now = time.time() message = 'Reading excel...' time.sleep(0.01) number_of_data = 0 number_of_header = 0 for y in range(number_of_row): key = sheet.cell_value(rowx=y, colx=0) try: key = int(key) except: pass # I only get integer keys which means excel rows which has integer at first cell. # This is for not getting header rows in my dictionary. # and I design my excels with ID column at first column. if isinstance(key, int): number_of_data += 1 all_data[number_of_data] = list() for x in range(number_of_column): val = sheet.cell_value(rowx=y, colx=x) val = String.float_to_integer(val, force_number=False) all_data[number_of_data].append(val) else: number_of_header += 1 headers[number_of_header] = list() for x in range(number_of_column): val = sheet.cell_value(rowx=y, colx=x) val = String.float_to_integer(val, force_number=False) headers[number_of_header].append(val) count += 1 Progress.progress( count=count, total=total, now=now, message=message, ) except PermissionError: message = "--> '%s' can't access to this file.\nIt is probably because the file is open. If this excel is open, please close it and re-run program." % excel Progress.exit_app(message=message, exit_all=exit_all) except Exception as e: message = "--> An error occurred while reading file... '%s'" % excel Progress.exit_app(e=e, message=message, exit_all=exit_all) print('\nNumber of item: %s' % len(all_data)) # it returns a dictionary from 3 rows excel file as: # all_data = { # 1: ['1st Column Value', '2nd Column Value', '3rd Column Value', '4th Column Value', '5th Column Value', ], # 2: ['1st Column Value', '2nd Column Value', '3rd Column Value', '4th Column Value', '5th Column Value', ], # 3: ['1st Column Value', '2nd Column Value', '3rd Column Value', '4th Column Value', '5th Column Value', ], # } return all_data, headers
def excel_create( excel, all_data, headers=None, sizes=None, locations=None, page_name='Page1', exit_all=False, ): if not headers: headers = list() # Check and add xlsx or xls if there is not at the end. file_name, file_extension = os.path.splitext(excel) if file_extension != '.xlsx' or file_extension != '.xls': excel = file_name + '.xlsx' try: message = "'%s' --> Creating..." % excel total = len(all_data) print(message) if not total: message = '\n--> No data.' print(message) return if len(all_data): length_max = 0 for val in all_data.values(): try: if length_max < len(val): # Find the row which has maximum length if isinstance(val[-1], dict): length_max = len(val) - 1 else: length_max = len(val) except: pass i = 0 while len(headers) < length_max: # if maximum length of any row larger than HEADERS, add "Header %i" rest of the headers i += 1 headers.append('Header %s' % i) for key in list(all_data.keys()): # if length of Headers larger than any row, add empty cell end of the row while len(headers) > len(all_data[key]): all_data[key].append('') if sizes: while len(headers) > len(sizes): sizes.append(20) else: sizes = list() for head in headers: sizes.append(25) if locations: while len(headers) > len(locations): locations.append('left') else: locations = list() for head in headers: locations.append('left') attrs_loc = dict() for val in all_data.values(): for elem in val: if isinstance(elem, dict): for name, attr in elem.items(): if name not in attrs_loc.keys(): headers.append(name) sizes.append(20) locations.append('left') attrs_loc[name] = len(headers) workbook = xlsxwriter.Workbook(excel) worksheet = workbook.add_worksheet(page_name) worksheet.freeze_panes(1, 0) cell_format_header = workbook.add_format({'border': 1}) cell_format_header.set_pattern(1) cell_format_header.set_bg_color('orange') cell_format_header.set_align('center') cell_format_header.set_align('vcenter') cell_format_header.set_bold() cell_format_center_regular = workbook.add_format({'border': 1}) cell_format_center_regular.set_align('center') cell_format_center_regular.set_align('vcenter') cell_format_regular = workbook.add_format({'border': 1}) cell_format_regular.set_align('left') cell_format_regular.set_align('vcenter') cell_format_right_regular = workbook.add_format({'border': 1}) cell_format_right_regular.set_align('right') cell_format_right_regular.set_align('vcenter') cell_format_copyr = workbook.add_format({'border': 1}) cell_format_copyr.set_pattern(1) cell_format_copyr.set_bg_color('FABF8F') cell_format_copyr.set_align('center') cell_format_copyr.set_align('vcenter') cell_format_copyr.set_bold() row = 0 col = 0 set_say = 0 worksheet.write(row, col, 'ID', cell_format_header) worksheet.set_column(set_say, set_say, 8) set_say += 1 col += 1 for head, size in zip(headers, sizes): worksheet.write(row, col, head, cell_format_header) worksheet.set_column(set_say, set_say, size) set_say += 1 col += 1 worksheet.write(row, col, 'Automated by BerkayMizrak.com', cell_format_copyr) worksheet.set_column(set_say, set_say, 34) row += 1 count = 0 now = time.time() time.sleep(0.01) id_count = 0 for val in all_data.values(): id_count += 1 col = 0 worksheet.write(row, col, id_count, cell_format_center_regular) for elem in val: if isinstance(elem, dict): continue col += 1 if col > len(locations): go_left = True else: go_left = False if locations[col - 1] == 'center': worksheet.write(row, col, elem, cell_format_center_regular) elif locations[col - 1] == 'right': worksheet.write(row, col, elem, cell_format_right_regular) else: go_left = True if go_left: try: elem = int(elem) worksheet.write(row, col, elem, cell_format_center_regular) except: worksheet.write(row, col, elem, cell_format_regular) for elem in val: if isinstance(elem, dict): for name, attr in elem.items(): worksheet.write(row, attrs_loc[name], attr, cell_format_regular) row += 1 count += 1 Progress.progress( count=count, total=total, now=now, ) print() workbook.close() message = "'%s' Data Saved to Excel -->> '%s'" % (count, excel) print(message) except PermissionError: message = "--> '%s' can't access to this file.\nIt is probably because the file is open. If this excel is open, please close it and re-run program." % excel Progress.exit_app(message=message, exit_all=exit_all) except Exception as e: message = "--> An error occurred while creating file... '%s'" % excel Progress.exit_app(e=e, message=message, exit_all=exit_all)
def read_records_data_to_dict(txt_file, show_progress=True, file_not_found_error=True, exit_all=True): # This def is for reading data with columns like excel but from plain text file read_dict = dict() try: total = 0 file_exist = True try: file = open(txt_file, 'r', encoding='utf-8') for line in file: total += 1 file.close() except FileNotFoundError: file_exist = False if file_not_found_error: message = "--> File coulnd't be found in folder. --> '%s'" % txt_file Progress.exit_app(message=message, exit_all=exit_all) if file_exist: count = 0 file = open(txt_file, 'r', encoding='utf-8') now = time.time() time.sleep(0.01) new_line = True key = 1 for line in file: if key not in read_dict.keys(): read_dict[key] = list() line = line[:-1] if show_progress: count += 1 Progress.progress( count=count, total=total, now=now, message='Reading records...', ) if line == '-' * 40: key += 1 new_line = True continue if line == '-' * 20: new_line = True continue if new_line: read_dict[key].append(line) else: read_dict[key][-1] = read_dict[key][-1] + line new_line = False if show_progress: print() file.close() for key in list(read_dict.keys()): if not len(read_dict[key]): del read_dict[key] except Exception as e: if show_progress: print() message = "--> An error occurred while reading file -> '%s'" % txt_file Progress.exit_app(e=e, message=message, exit_all=exit_all) return read_dict