def scroll_div(num):
    xpath_scroll_elem = '//div[contains(@role, "dialog")]//div/ul/..'
    class_scroll = browser.find_element_by_xpath(
        xpath_scroll_elem).get_attribute('class')
    jscommand = """
    followers = document.querySelector(".%s");
    followers.scrollTo(0, followers.scrollHeight);
    var lenOfPage=followers.scrollHeight;
    return lenOfPage;
    """ % class_scroll
    lenOfPage = browser.execute_script(jscommand)
    match = False
    now = time.time()
    time.sleep(0.01)
    count = 0
    fetched = False
    while (match == False):
        lastCount = lenOfPage
        time.sleep(1)
        lenOfPage = browser.execute_script(jscommand)
        num_current_follows = len(
            browser.find_elements_by_xpath(
                "//a[contains(@class,'notranslate')]"))
        if lastCount == lenOfPage:
            if num > num_current_follows:
                fetched = False
            else:
                match = True
        else:
            fetched = True

        if fetched:
            count = 0
        else:
            count += 1
        Progress.progress(count=num_current_follows,
                          total=num,
                          now=now,
                          message='Fetching...')
        if count > 15:
            print(
                '\n--> It has been too long time that program could not fetch new data. Now will ignore and continue to process.'
            )
            match = True
Exemple #2
0
def excel_read_to_dict(excel, number_of_sheet=0, exit_all=False):
    all_data = dict()
    headers = dict()

    try:
        # Check and add xlsx or xls if there is not at the end.
        file_name, file_extension = os.path.splitext(excel)
        if file_extension != '.xlsx' or file_extension != '.xls':
            excel = file_name + '.xlsx'

        # check all versions of the file name if it is exist in directory.
        # (Checking with all lower and capital characters for excel name if it is equal any file.)
        excel = find_file(excel)
        if not os.path.exists(excel):
            # So given file name could not be found in directory with any combinations of capital and lower characters.
            excel2 = None

            # switch between xlsx and xls
            if file_extension != '.xlsx':
                excel2 = file_name + '.xls'
            elif file_extension != '.xls':
                excel2 = file_name + '.xlsx'

            if excel2:
                # if given file name is xlsx, it switched to xls in "excel2"
                # if given file name is xls, it switched to xlsx in "excel2"
                # and checking again...
                excel2 = find_file(excel2)
                if not os.path.exists(excel2):
                    message = "! ! File couldn't be found in folder. --> '%s' or '%s'" % (
                        excel, excel2)
                    Progress.exit_app(message=message, exit_all=exit_all)
                    return all_data, headers
                else:
                    excel = excel2
            else:
                message = "! ! File couldn't be found in folder. --> '%s'" % (
                    excel)
                Progress.exit_app(message=message, exit_all=exit_all)
                return all_data, headers

        workbook = xlrd.open_workbook(excel)  # sheet
        sheet = workbook.sheet_by_index(number_of_sheet)  # page

        number_of_column = sheet.ncols
        number_of_row = len(sheet.col(0))

        count = 0
        total = number_of_row
        now = time.time()
        message = 'Reading excel...'
        time.sleep(0.01)

        number_of_data = 0
        number_of_header = 0

        for y in range(number_of_row):
            key = sheet.cell_value(rowx=y, colx=0)
            try:
                key = int(key)
            except:
                pass

            # I only get integer keys which means excel rows which has integer at first cell.
            # This is for not getting header rows in my dictionary.
            # and I design my excels with ID column at first column.
            if isinstance(key, int):
                number_of_data += 1
                all_data[number_of_data] = list()
                for x in range(number_of_column):
                    val = sheet.cell_value(rowx=y, colx=x)
                    val = String.float_to_integer(val, force_number=False)
                    all_data[number_of_data].append(val)
            else:
                number_of_header += 1
                headers[number_of_header] = list()
                for x in range(number_of_column):
                    val = sheet.cell_value(rowx=y, colx=x)
                    val = String.float_to_integer(val, force_number=False)
                    headers[number_of_header].append(val)

            count += 1

            Progress.progress(
                count=count,
                total=total,
                now=now,
                message=message,
            )
    except PermissionError:
        message = "--> '%s' can't access to this file.\nIt is probably because the file is open. If this excel is open, please close it and re-run program." % excel
        Progress.exit_app(message=message, exit_all=exit_all)
    except Exception as e:
        message = "--> An error occurred while reading file... '%s'" % excel
        Progress.exit_app(e=e, message=message, exit_all=exit_all)

    print('\nNumber of item: %s' % len(all_data))
    # it returns a dictionary from 3 rows excel file as:
    # all_data = {
    #     1: ['1st Column Value', '2nd Column Value', '3rd Column Value', '4th Column Value', '5th Column Value', ],
    #     2: ['1st Column Value', '2nd Column Value', '3rd Column Value', '4th Column Value', '5th Column Value', ],
    #     3: ['1st Column Value', '2nd Column Value', '3rd Column Value', '4th Column Value', '5th Column Value', ],
    # }
    return all_data, headers
Exemple #3
0
def excel_create(
    excel,
    all_data,
    headers=None,
    sizes=None,
    locations=None,
    page_name='Page1',
    exit_all=False,
):
    if not headers:
        headers = list()

    # Check and add xlsx or xls if there is not at the end.
    file_name, file_extension = os.path.splitext(excel)
    if file_extension != '.xlsx' or file_extension != '.xls':
        excel = file_name + '.xlsx'

    try:
        message = "'%s'  --> Creating..." % excel
        total = len(all_data)
        print(message)

        if not total:
            message = '\n--> No data.'
            print(message)
            return

        if len(all_data):
            length_max = 0
            for val in all_data.values():
                try:
                    if length_max < len(val):
                        # Find the row which has maximum length
                        if isinstance(val[-1], dict):
                            length_max = len(val) - 1
                        else:
                            length_max = len(val)
                except:
                    pass

            i = 0
            while len(headers) < length_max:
                # if maximum length of any row larger than HEADERS, add "Header %i" rest of the headers
                i += 1
                headers.append('Header %s' % i)

            for key in list(all_data.keys()):
                # if length of Headers larger than any row, add empty cell end of the row
                while len(headers) > len(all_data[key]):
                    all_data[key].append('')

        if sizes:
            while len(headers) > len(sizes):
                sizes.append(20)
        else:
            sizes = list()
            for head in headers:
                sizes.append(25)

        if locations:
            while len(headers) > len(locations):
                locations.append('left')
        else:
            locations = list()
            for head in headers:
                locations.append('left')

        attrs_loc = dict()
        for val in all_data.values():
            for elem in val:
                if isinstance(elem, dict):
                    for name, attr in elem.items():
                        if name not in attrs_loc.keys():
                            headers.append(name)
                            sizes.append(20)
                            locations.append('left')

                            attrs_loc[name] = len(headers)

        workbook = xlsxwriter.Workbook(excel)
        worksheet = workbook.add_worksheet(page_name)

        worksheet.freeze_panes(1, 0)

        cell_format_header = workbook.add_format({'border': 1})
        cell_format_header.set_pattern(1)
        cell_format_header.set_bg_color('orange')
        cell_format_header.set_align('center')
        cell_format_header.set_align('vcenter')
        cell_format_header.set_bold()

        cell_format_center_regular = workbook.add_format({'border': 1})
        cell_format_center_regular.set_align('center')
        cell_format_center_regular.set_align('vcenter')

        cell_format_regular = workbook.add_format({'border': 1})
        cell_format_regular.set_align('left')
        cell_format_regular.set_align('vcenter')

        cell_format_right_regular = workbook.add_format({'border': 1})
        cell_format_right_regular.set_align('right')
        cell_format_right_regular.set_align('vcenter')

        cell_format_copyr = workbook.add_format({'border': 1})
        cell_format_copyr.set_pattern(1)
        cell_format_copyr.set_bg_color('FABF8F')
        cell_format_copyr.set_align('center')
        cell_format_copyr.set_align('vcenter')
        cell_format_copyr.set_bold()

        row = 0
        col = 0
        set_say = 0
        worksheet.write(row, col, 'ID', cell_format_header)
        worksheet.set_column(set_say, set_say, 8)
        set_say += 1
        col += 1
        for head, size in zip(headers, sizes):
            worksheet.write(row, col, head, cell_format_header)
            worksheet.set_column(set_say, set_say, size)
            set_say += 1
            col += 1
        worksheet.write(row, col, 'Automated by BerkayMizrak.com',
                        cell_format_copyr)
        worksheet.set_column(set_say, set_say, 34)
        row += 1

        count = 0
        now = time.time()
        time.sleep(0.01)

        id_count = 0
        for val in all_data.values():
            id_count += 1
            col = 0
            worksheet.write(row, col, id_count, cell_format_center_regular)
            for elem in val:
                if isinstance(elem, dict):
                    continue

                col += 1
                if col > len(locations):
                    go_left = True
                else:
                    go_left = False
                    if locations[col - 1] == 'center':
                        worksheet.write(row, col, elem,
                                        cell_format_center_regular)
                    elif locations[col - 1] == 'right':
                        worksheet.write(row, col, elem,
                                        cell_format_right_regular)
                    else:
                        go_left = True
                if go_left:
                    try:
                        elem = int(elem)
                        worksheet.write(row, col, elem,
                                        cell_format_center_regular)
                    except:
                        worksheet.write(row, col, elem, cell_format_regular)

            for elem in val:
                if isinstance(elem, dict):
                    for name, attr in elem.items():
                        worksheet.write(row, attrs_loc[name], attr,
                                        cell_format_regular)

            row += 1

            count += 1
            Progress.progress(
                count=count,
                total=total,
                now=now,
            )

        print()
        workbook.close()
        message = "'%s' Data Saved to Excel -->> '%s'" % (count, excel)
        print(message)
    except PermissionError:
        message = "--> '%s' can't access to this file.\nIt is probably because the file is open. If this excel is open, please close it and re-run program." % excel
        Progress.exit_app(message=message, exit_all=exit_all)
    except Exception as e:
        message = "--> An error occurred while creating file... '%s'" % excel
        Progress.exit_app(e=e, message=message, exit_all=exit_all)
Exemple #4
0
def read_records_data_to_dict(txt_file,
                              show_progress=True,
                              file_not_found_error=True,
                              exit_all=True):
    # This def is for reading data with columns like excel but from plain text file
    read_dict = dict()
    try:
        total = 0
        file_exist = True
        try:
            file = open(txt_file, 'r', encoding='utf-8')
            for line in file:
                total += 1
            file.close()
        except FileNotFoundError:
            file_exist = False
            if file_not_found_error:
                message = "--> File coulnd't be found in folder. --> '%s'" % txt_file
                Progress.exit_app(message=message, exit_all=exit_all)

        if file_exist:
            count = 0
            file = open(txt_file, 'r', encoding='utf-8')
            now = time.time()
            time.sleep(0.01)
            new_line = True
            key = 1
            for line in file:
                if key not in read_dict.keys():
                    read_dict[key] = list()

                line = line[:-1]
                if show_progress:
                    count += 1
                    Progress.progress(
                        count=count,
                        total=total,
                        now=now,
                        message='Reading records...',
                    )

                if line == '-' * 40:
                    key += 1
                    new_line = True
                    continue
                if line == '-' * 20:
                    new_line = True
                    continue

                if new_line:
                    read_dict[key].append(line)
                else:
                    read_dict[key][-1] = read_dict[key][-1] + line
                new_line = False

            if show_progress:
                print()

            file.close()
            for key in list(read_dict.keys()):
                if not len(read_dict[key]):
                    del read_dict[key]
    except Exception as e:
        if show_progress:
            print()
        message = "--> An error occurred while reading file -> '%s'" % txt_file
        Progress.exit_app(e=e, message=message, exit_all=exit_all)

    return read_dict