Ejemplo n.º 1
0
def read_records_to_list(txt_file,
                         encoding='utf-8',
                         file_not_found_error=False,
                         exit_all=True):
    records = []
    message = ''
    error_text = ''
    error_def = False
    try:
        file = open(txt_file, 'r', encoding=encoding)  # latin-1 | utf-8
        for line in file:
            line = line.replace('\n', '')
            records.append(line)
        file.close()
    except FileNotFoundError:
        if file_not_found_error:
            error_def = True
            message = "--> File coulnd't be found in folder. --> '%s'" % txt_file
            error_text = None
    except Exception as e:
        error_def = True
        message = '--> An error occurred while reading file. --> "%s"' % txt_file
        error_text = e

    if error_def:
        Progress.exit_app(message=message, e=error_text, exit_all=exit_all)

    return records
Ejemplo n.º 2
0
def create_folder(folder_name, path='./', exit_all=True):
    folder_name = windows_folder_name(folder_name)
    path = path + folder_name
    try:
        if not os.path.exists(path):
            os.mkdir(path)
            return True
    except Exception as e:
        message = '--> An error occurred while creating folder. Please try again with running program as administrator or create folder by yourself.\n' \
                  'Folder Name: %s' % folder_name
        Progress.exit_app(message=message, e=e, exit_all=exit_all)
    return False
Ejemplo n.º 3
0
def save_records_list(txt_file, records_list, overwrite=False, exit_all=True):
    try:
        if overwrite:
            file = open(txt_file, 'w', encoding='utf-8')
        else:
            file = open(txt_file, 'a', encoding='utf-8')

        for record in records_list:
            file.write('%s\n' % record)
        file.close()
    except Exception as e:
        message = '--> An error occurred while creating file. --> "%s"' % txt_file
        Progress.exit_app(message=message, e=e, exit_all=exit_all)
Ejemplo n.º 4
0
def turn_off_all_alerts(browser, accept=True, show_error=False, sound_for_error=False, exit_all=False):
    # options.add_argument("--disable-popup-blocking")  # This is argument for selenium browser to block everything
    try:
        alert = browser.switch_to.alert
        if accept:
            alert.accept()
        else:
            alert.dismiss()
    except Exception as e:
        if sound_for_error:
            Progress.sound_notify()
        if show_error:
            Progress.exit_app(e=e, exit_all=exit_all)
Ejemplo n.º 5
0
def save_dict_with_pprint_pformat(file, dict_as_string, exit_all=False):
    if isinstance(dict_as_string, str):
        try:
            file_name, file_extension = os.path.splitext(file)
            if file_extension != '.txt':
                file = file_name + '.txt'

            dict_as_string_list = dict_as_string.split('\n')
            save_records_list(file,
                              dict_as_string_list,
                              overwrite=True,
                              exit_all=exit_all)
        except Exception as e:
            message = '--> An error occurred while creating file. --> "%s"' % file
            Progress.exit_app(message=message, e=e, exit_all=exit_all)
Ejemplo n.º 6
0
def save_records_data(txt_file,
                      val_list,
                      message='File updating...',
                      exit_all=True):
    # This def is for saving data with columns like excel but into the txt file
    try:
        print(message)
        file = open(txt_file, 'a', encoding='utf-8')
        for val in val_list:
            file.write(str(val))
            file.write('\n')
            file.write('-' * 20)
            file.write('\n')
        file.write('-' * 40)
        file.write('\n')

        file.close()
        print('File saved.')
    except Exception as e:
        message = '--> An error occurred while saving file.'
        Progress.exit_app(message=message, e=e, exit_all=exit_all)
Ejemplo n.º 7
0
def dump_data(file, data, exit_all=False):
    try:
        if isinstance(data, dict) or isinstance(data, list):
            file_name, file_extension = os.path.splitext(file)
            if file_extension != '.pickle':
                file = file_name + '.pickle'

            pickle_out = open(file, "wb")
            pickle.dump(data, pickle_out)
            pickle_out.close()
        # elif isinstance(data, list):
        #     file_name, file_extension = os.path.splitext(file)
        #     if file_extension != '.npy':
        #         file = file_name + '.npy'
        #
        #     np.save(file, data)
        else:
            message = "--> Data type is not acceptable. Data type only can be a 'list' or 'dict'."
            Progress.exit_app(message=message, exit_all=exit_all)
    except Exception as e:
        message = '--> An error occurred while creating file. --> "%s"' % file
        Progress.exit_app(message=message, e=e, exit_all=exit_all)
Ejemplo n.º 8
0
def timestamp_def(seperate=False,
                  exit_all=True,
                  alternative='timestamp_error',
                  with_space=False):
    try:
        now_date = datetime.now()
        date = now_date.strftime("%Y.%m.%d")
        hour = now_date.strftime("%H.%M.%S")
        if with_space:
            now = date + ' - ' + hour
        else:
            now = date + '-' + hour

    except Exception as e:
        message = "--> An error occurred while creating timestamp."
        Progress.exit_app(message=message, e=e, exit_all=exit_all)
        now = alternative
        date = alternative
        hour = alternative

    if seperate:
        return date, hour
    else:
        return now
Ejemplo n.º 9
0
def read_dumped_data(file,
                     data_type=dict,
                     file_not_found_error=False,
                     exit_all=True):
    result = None
    try:
        if data_type == dict:
            file_name, file_extension = os.path.splitext(file)
            if file_extension != '.pickle':
                file = file_name + '.pickle'

            if not os.path.exists(file):
                if file_not_found_error:
                    message = "! ! File couldn't be found in folder. --> '%s'" % (
                        file)
                    Progress.exit_app(message=message, exit_all=exit_all)
                return None

            pickle_in = open(file, "rb")
            result = pickle.load(pickle_in)
        elif data_type == list:
            file_name, file_extension = os.path.splitext(file)
            if file_extension != '.npy':
                file = file_name + '.npy'

            if not os.path.exists(file):
                if file_not_found_error:
                    message = "! ! File couldn't be found in folder. --> '%s'" % (
                        file)
                    Progress.exit_app(message=message, exit_all=exit_all)
                return None

            result = np.load(file).tolist()
        else:
            message = "--> File extension is not acceptable. File extension only can be '.pickle' or '.npy'."
            Progress.exit_app(message=message, exit_all=exit_all)
    except Exception as e:
        message = '--> An error occurred while reading file. --> "%s"' % file
        Progress.exit_app(e=e, message=message, exit_all=exit_all)

    return result
Ejemplo n.º 10
0
def captcha_solve(browser, cost_file='costs.txt', ANTICAPTCHA_KEY=None, save_cost=True, captcha_sound=True, domain=None):
    if not ANTICAPTCHA_KEY:
        ANTICAPTCHA_KEY = os.getenv('ANTICAPTCHA_KEY')

    if not domain:
        domain = browser.current_url

    xpath = '//*[@id = "g-recaptcha-response"]'
    try:
        browser.find_element_by_xpath(xpath)
        # Captcha found in page
        exist_captcha = True
    except:
        # Captcha CAN NOT found in page
        exist_captcha = False

    user_answer = None
    cost = 0
    start_time = time.time()
    if exist_captcha:
        if captcha_sound:
            Progress.sound_notify_times(times=1)
        print('--> reCAPTCHA solving. It might take some time, please wait...')
        key = ''
        try:
            SITE_KEY = None
            try:
                # TRY normal captcha box
                xpath = '//*[contains(@class,"g-recaptcha")]'
                captcha_box = browser.find_element_by_xpath(xpath)
                SITE_KEY = captcha_box.get_attribute('data-sitekey')
                if not SITE_KEY:
                    raise Exception
            except:
                # Normal captcha box COULD NOT BE FOUND. Find site key from new generation of reCAPTCHA
                xpath = '//iframe[contains(@role, "presentation")]'
                captcha_box = browser.find_element_by_xpath(xpath)
                captcha_src = captcha_box.get_attribute('src')
                if 'k=' in captcha_src and '&' in captcha_src:
                    captcha_src_list = captcha_src.split('&')
                    for i in captcha_src_list:
                        if i.startswith('k='):
                            SITE_KEY = i.replace('k=', '')
                            break
            if not SITE_KEY:
                raise Exception

            user_answer = NoCaptchaTaskProxyless.NoCaptchaTaskProxyless(
                anticaptcha_key=ANTICAPTCHA_KEY).captcha_handler(
                websiteURL=domain, websiteKey=SITE_KEY)
            if 'errorDescription' in user_answer:
                raise Exception
            key = user_answer['solution']['gRecaptchaResponse']
            try:
                cost = user_answer['cost']
                cost = float(cost)
            except:
                cost = 0

            # Code worked untill here so there is no error.
            error_captcha = False
        except Exception as e:
            error_captcha = True
            message = '--> An error occurred while solving reCAPTCHA. Processing is in progress.'
            if 'errorDescription' in user_answer:
                message_from_system = user_answer['errorDescription']
                message = message + '\n' + str(message_from_system)
            Progress.exit_app(e=e, message=message, exit_all=False)

        if not error_captcha:
            if 'endTime' in user_answer and 'createTime' in user_answer:
                end_time = user_answer['endTime']
                create_time = user_answer['createTime']
                pass_time = end_time - create_time
            else:
                pass_time = time.time() - start_time

            print('\nCalculation time: %s' % Progress.time_definition(pass_time))
            print('reCAPTCHA solved. Price: $%s' % cost)

        if save_cost:
            if cost != 0 and isinstance(cost, (int, float)):
                read_record = File.read_records_to_list(txt_file=cost_file, file_not_found_error=False, exit_all=False)
                try:
                    balance = float(read_record[0])
                except:
                    balance = 0
                balance += cost
                File.save_records_list(txt_file=cost_file, records_list=[balance], overwrite=True, exit_all=False)

        # ADD SOLUTION TO THE PAGE.
        try:
            browser.execute_script('document.getElementById("g-recaptcha-response").innerHTML = "%s"' % key)
        except:
            pass

    return exist_captcha
Ejemplo n.º 11
0
def get_proxy_orbit(selenium=True,
                    get_random=True,
                    count_loop=1,
                    save_false_proxies=True,
                    error_file='Recorded FALSE Orbit Proxies.txt',
                    save_ok_proxies=True,
                    ok_file='Recorded OK Orbit Proxies.txt',
                    number_of_min_saved_proxies=7,
                    number_of_save_proxies=15,
                    run_test=True,
                    test_header=None,
                    test_url=None,
                    test_timeout=1,
                    sound_error=True,
                    allow_print=True,
                    no_proxy=True,
                    for_https=True,
                    API_KEY=''):
    # You can use this function with whether count_loop or get_random.
    # count_loop helps you to run it in while with using count_loop+=1 and you can receive proxies 1 by 1 in lines of proxy file.
    # if get_random set True, you get proxy randomly from proxy file without looking count_loop.
    if allow_print:
        print()
        print('--> Proxy scraper is started.')
        print()
    if get_random:
        count_loop = random.randint(1, 101)
    if number_of_save_proxies < 2:
        number_of_save_proxies = 2  # Proxy Orbit send as list when count is more than 1

    user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.108 Safari/537.36'
    header = {"User-Agent": user_agent}

    url = ''
    if run_test:
        if not test_header:
            test_header = header

        if not test_url:
            url = 'https://api.myip.com/'
            # url = 'https://api.ipify.org/'
        else:
            url = test_url

        if for_https:
            if url.lower().startswith('http://'):
                url = url.lower().replace('http://', 'https://', 1)
        else:
            if url.lower().startswith('https://'):
                url = url.lower().replace('https://', 'http://', 1)

    proxy_decide = ''
    again = True
    curl = ''
    while again:
        url_proxy = "http://api.proxyorbit.com/v1/"
        url_proxy += '?ssl=%s' % str(for_https).lower()
        url_proxy += '&protocols=http'
        url_proxy += '&count=%s' % number_of_save_proxies
        url_proxy += '&token=%s' % API_KEY
        check_internet = True  # will use this to check internet connection without proxy only once.
        count_loop += 1
        again = False  # will leave while unless again defined True

        if count_loop % 10 == 0 and no_proxy:
            # def will return NON-PROXY each 10 times

            if allow_print:
                print('Default proxy settings setted.')
            if selenium:
                proxy_decide = '--no-proxy-server'
            else:
                proxy_decide = {}
        else:
            # def will return NON-PROXY each 10 times

            error_ip_list = File.read_records_to_list(
                error_file, file_not_found_error=False, exit_all=False)
            ok_ip_list = File.read_records_to_list(ok_file,
                                                   file_not_found_error=False,
                                                   exit_all=False)
            ok_ip_save_list = []
            for i in ok_ip_list:
                ok_ip_save_list.append(i)
            if ((len(ok_ip_list) < number_of_min_saved_proxies)
                    and save_ok_proxies):
                # if number saved proxies to the file less than minimum required number of proxies,
                # will crawl more new proxies.
                check_internet = False  # internet connection checked, so will not check it in next commands.
                internet_connection(timeout=4,
                                    reload_time=30,
                                    wait_for_network=True,
                                    sound_error=sound_error)
                try:
                    message = 'Proxies are grabbed.'
                    logging.log(logging.INFO,
                                message + '  |  URL: %s' % url_proxy)
                    if allow_print:
                        print(message)
                        print()
                    resp = requests.get(url_proxy)
                    resp = resp.json()
                except:
                    message = 'Error occurred while crawling new proxies.'
                    logging.log(logging.ERROR, message)
                    if allow_print:
                        print('\n--> ' + message)
                    again = True
                    count_loop -= 1
                    Progress.count_down(30)

                    # continue to loop until get the new proxies.
                    continue

                curls = []
                for pr in resp:
                    curls.append(pr['curl'])
                count_ip = 0

                for curl in curls:
                    if save_false_proxies:
                        # check if new proxy is not one of the proxies which doesn't work.

                        if curl not in error_ip_list:
                            if save_ok_proxies:
                                if curl not in ok_ip_save_list:
                                    count_ip += 1
                                    ok_ip_save_list.append(curl)
                            else:
                                count_ip += 1
                                ok_ip_save_list.append(curl)
                    else:
                        if save_ok_proxies:
                            # add our new proxy to the list of all proxies to save this in our proxy file in the future.
                            if curl not in ok_ip_save_list:
                                count_ip += 1
                                ok_ip_save_list.append(curl)

                if save_ok_proxies:
                    if len(ok_ip_save_list) > number_of_save_proxies:
                        ok_ip_save_list = random.sample(
                            ok_ip_save_list, number_of_save_proxies)
                    File.save_records_list(ok_file,
                                           ok_ip_save_list,
                                           overwrite=True,
                                           exit_all=False)

            if not len(ok_ip_save_list):
                again = True
                message = "Proxy couldn't get. Trying again..."
                Progress.exit_app(message=message, exit_all=False)
                continue

            if get_random:
                curl = random.choice(ok_ip_save_list)
            else:
                # Remaining calculated to get a proxy from our list, from LAST to FIRST.
                remaining = count_loop % len(ok_ip_save_list)
                remaining = len(ok_ip_save_list) - remaining
                if remaining >= len(ok_ip_save_list):
                    remaining = 0

                curl = ok_ip_save_list[remaining]
            curl = curl.replace(' ', '')
            curl = curl.replace('\n', '')

            proxy_decide = {}
            if for_https:
                proxy_decide['https'] = curl
            else:
                proxy_decide['http'] = curl

            if save_false_proxies:
                if curl in error_ip_list:
                    File.write_ok_and_false_proxy(curl,
                                                  error_file=error_file,
                                                  ok_file=ok_file)
                    again = True
                    continue

            if run_test:
                try:
                    if check_internet:
                        internet_connection(timeout=test_timeout,
                                            reload_time=30,
                                            wait_for_network=True,
                                            sound_error=sound_error)

                    response = requests.get(url,
                                            proxies=proxy_decide,
                                            timeout=test_timeout,
                                            stream=True,
                                            headers=test_header)
                    if test_url:
                        # if any url overwritten on def, just check the status code.
                        if response.status_code != 200:
                            raise Exception
                    else:
                        response = response.json()
                        if response == {}:
                            raise Exception
                    if save_ok_proxies:
                        File.save_records_list(ok_file,
                                               ok_ip_save_list,
                                               overwrite=True,
                                               exit_all=False)
                except Exception as e:
                    message = "Proxy doesn't work. Next proxy is testing...\n" \
                              "Proxy: %s" % (curl)
                    if not get_random:
                        message += '\tProxy Number: %s' % remaining
                    if allow_print:
                        print(message)
                        print()
                    again = True
                    if save_false_proxies:
                        File.write_ok_and_false_proxy(curl,
                                                      error_file=error_file,
                                                      ok_file=ok_file)
                    continue

            ip_port = curl.split('//')[-1]
            if selenium:
                proxy_decide = '--proxy-server=%s' % (ip_port)
            else:
                # proxy_decide defined above.
                pass

            if allow_print:
                if get_random:
                    print("Proxy activated.\nProxy: %s" % (curl))
                else:
                    print("Proxy activated. Proxy Number: %s.\nProxy: %s" %
                          (count_loop, curl))

    if allow_print:
        print()
    if get_random:
        return proxy_decide, curl
    else:
        return count_loop, proxy_decide, curl
            # In first loop, it will raise Exception and will create Browser first time.
            browser.current_url
        except Exception as e:
            options = webdriver.ChromeOptions()
            options.add_argument('user-agent={%s}' % user_agent)
            options.add_argument('--blink-settings=imagesEnabled=false'
                                 )  # Remove images from pages to open fast
            browser = webdriver.Chrome(options=options, executable_path=driver)

        count_reflesh = 0
        while not login_succesful:
            count_reflesh += 1
            if count_reflesh > 3:
                browser.quit()
                message = 'Program shutting down because of errors.'
                Progress.exit_app(message=message, exit_all=True)

            # Go to the link and check the xpath given if element present on the page.
            url = 'https://www.instagram.com/accounts/login/'
            xpath = '//div[@id = "react-root"]'
            Selenium.check_page(browser, url, xpath, 10)

            try:
                WebDriverWait(browser, 5).until(
                    EC.presence_of_element_located((By.NAME, "username")))
            except:
                message = 'Login form could not be found on page. Will load again.'
                Progress.exit_app(message=message, exit_all=False)
                continue

            element_username = browser.find_element_by_name("username")
Ejemplo n.º 13
0
def get_proxy(selenium=True,
              get_random=True,
              count_loop=1,
              save_false_proxies=True,
              error_file='Recorded FALSE Proxies.txt',
              save_ok_proxies=True,
              ok_file='Recorded OK Proxies.txt',
              number_of_min_saved_proxies=20,
              number_of_save_proxies=40,
              run_test=True,
              test_header=None,
              test_url=None,
              test_timeout=1,
              sound_error=True,
              allow_print=True,
              no_proxy=True,
              for_https=True):
    # You can use this function with whether count_loop or get_random.
    # count_loop helps you to run it in while with using count_loop+=1 and you can receive proxies 1 by 1 in lines of proxy file.
    # if get_random set True, you get proxy randomly from proxy file without looking count_loop.
    if get_random:
        count_loop = random.randint(1, 101)

    user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.108 Safari/537.36'
    header = {"User-Agent": user_agent}

    if run_test:
        if not test_header:
            test_header = header

        if not test_url:
            url = 'https://api.myip.com/'
            # url = 'https://api.ipify.org/'
        else:
            url = test_url

        if for_https:
            if url.lower().startswith('http://'):
                url = url.lower().replace('http://', 'https://', 1)
        else:
            if url.lower().startswith('https://'):
                url = url.lower().replace('https://', 'http://', 1)

    proxy_decide = ''
    url_proxies = [
        'https://hidemy.name/tr/proxy-list/?type=s#list',
        'https://www.us-proxy.org/',
        'https://www.sslproxies.org/',
    ]
    again = True
    force_scrap = False
    while again:
        random_proxy = random.randint(0, len(url_proxies) - 1)
        url_proxy = url_proxies[random_proxy]
        check_internet = True  # will use this to check internet connection without proxy only once.
        count_loop += 1
        again = False  # will leave while unless again defined True

        if count_loop % 10 == 0 and no_proxy:
            # def will return NON-PROXY each 10 times

            if allow_print:
                print('Default proxy settings setted.')
            if selenium:
                proxy_decide = '--no-proxy-server'
            else:
                proxy_decide = {}

            record_ip_type = ''
        else:
            # def will return NON-PROXY each 10 times

            error_ip_list = File.read_records_to_list(
                error_file, file_not_found_error=False, exit_all=False)
            ok_ip_list = File.read_records_to_list(ok_file,
                                                   file_not_found_error=False,
                                                   exit_all=False)
            ok_ip_save_list = []
            for i in ok_ip_list:
                ok_ip_save_list.append(i)
            if ((len(ok_ip_list) < number_of_min_saved_proxies)
                    and save_ok_proxies) or force_scrap:
                # if number saved proxies to the file less than minimum required number of proxies,
                # will crawl more new proxies.
                check_internet = False  # internet connection checked, so will not check it in next commands.
                internet_connection(timeout=4,
                                    reload_time=30,
                                    wait_for_network=True,
                                    sound_error=sound_error)
                try:
                    print('Proxies are grabbed:')
                    print(url_proxy)
                    page = requests.get(
                        url_proxy,
                        headers=header,
                        timeout=4,
                    )
                except:
                    message = 'Error occurred while crawling new proxies.'
                    logging.log(logging.ERROR, message)
                    if allow_print:
                        print('\n--> ' + message)
                    again = True
                    count_loop -= 1

                    # continue to loop until get the new proxies.
                    continue

                tree = html.fromstring(page.content)
                if 'hidemy.name' in url_proxy:
                    ips = tree.xpath(
                        '//div[@class = "table_block"]/table//tbody/tr/td[1]'
                    )  # list of all ips
                    ports = tree.xpath(
                        '//div[@class = "table_block"]/table//tbody/tr/td[2]'
                    )  # list of all ports
                    types = tree.xpath(
                        '//div[@class = "table_block"]/table//tbody/tr/td[5]'
                    )  # list of all types
                elif 'us-proxy' in url_proxy:
                    ips = tree.xpath(
                        '//div[contains(@class, "fpl-list")]//table//tr/td[1]'
                    )  # list of all ips
                    ports = tree.xpath(
                        '//div[contains(@class, "fpl-list")]//table//tr/td[2]'
                    )  # list of all ports
                    types = tree.xpath(
                        '//div[contains(@class, "fpl-list")]//table//tr/td[7]'
                    )  # list of all types
                elif 'sslproxies' in url_proxy:
                    ips = tree.xpath(
                        '//div[contains(@class, "fpl-list")]//table//tr/td[1]'
                    )  # list of all ips
                    ports = tree.xpath(
                        '//div[contains(@class, "fpl-list")]//table//tr/td[2]'
                    )  # list of all ports
                    types = tree.xpath(
                        '//div[contains(@class, "fpl-list")]//table//tr/td[7]'
                    )  # list of all types
                count_ip = 0
                if len(ips) == len(ports) == len(types):
                    pass
                else:
                    again = True
                    count_loop -= 1

                    # continue to loop untill get the new proxies.
                    continue

                for ip, port, type in zip(ips, ports, types):
                    # if ip.text and port.text and type.text:
                    #     pass
                    # else:
                    #     continue
                    add_type = ''
                    if 'hidemy.name' in url_proxy:
                        if 'http' in type.text.lower():
                            if add_type:
                                add_type += ','
                            add_type += 'HTTP'
                        if 'https' in type.text.lower():
                            if add_type:
                                add_type += ','
                            add_type += 'HTTPS'
                    elif 'us-proxy' in url_proxy:
                        add_type += 'HTTP'
                        if 'yes' in type.text.lower():
                            add_type += ',HTTPS'
                    elif 'sslproxies' in url_proxy:
                        add_type += 'HTTP'
                        if 'yes' in type.text.lower():
                            add_type += ',HTTPS'
                    if add_type:
                        add_type = ',' + add_type
                    else:
                        continue  # NO HTTP OR HTTPS PROXY (Such as Socks4, Socks5 proxy)
                    ip_from_page = ip.text.replace(' ', '')
                    add_ip_type = '%s:%s%s' % (ip_from_page, port.text,
                                               add_type)
                    try:
                        ipaddress.ip_address(ip_from_page)
                    except:
                        continue

                    if save_false_proxies:
                        # check if new proxy is not one of the proxies which doesn't work.

                        if add_ip_type not in error_ip_list:
                            if save_ok_proxies:
                                if add_ip_type not in ok_ip_save_list:
                                    count_ip += 1
                                    ok_ip_save_list.append(add_ip_type)
                            else:
                                count_ip += 1
                                ok_ip_save_list.append(add_ip_type)
                    else:
                        if save_ok_proxies:
                            # add our new proxy to the list of all proxies to save this in our proxy file in the future.
                            if add_ip_type not in ok_ip_save_list:
                                count_ip += 1
                                ok_ip_save_list.append(add_ip_type)

                if save_ok_proxies:
                    if len(ok_ip_save_list) > number_of_save_proxies:
                        ok_ip_save_list_new = random.sample(
                            ok_ip_save_list, number_of_save_proxies)
                        for elem in ok_ip_save_list:
                            elem_types = elem.split(',')[1:]
                            if 'HTTPS' in elem_types:
                                if elem not in ok_ip_save_list_new:
                                    ok_ip_save_list_new.append(elem)
                        ok_ip_save_list = ok_ip_save_list_new
                    File.save_records_list(ok_file,
                                           ok_ip_save_list,
                                           overwrite=True,
                                           exit_all=False)

            http_count = 0
            https_count = 0
            for elem in ok_ip_save_list:
                elem_types = elem.split(',')[1:]
                if 'HTTP' in elem_types:
                    http_count += 1
                if 'HTTPS' in elem_types:
                    https_count += 1
            if https_count <= 3 or http_count <= 3:
                again = True
                force_scrap = True
                count_loop -= 1
                continue
            else:
                force_scrap = False

            if not len(ok_ip_save_list):
                again = True
                message = "Proxy couldn't get. Trying again..."
                Progress.exit_app(message=message, exit_all=False)
                continue

            if get_random:
                record_ip_type = random.choice(ok_ip_save_list)
            else:
                # Remaining calculated to get a proxy from our list, from LAST to FIRST.
                remaining = count_loop % len(ok_ip_save_list)
                remaining = len(ok_ip_save_list) - remaining
                if remaining >= len(ok_ip_save_list):
                    remaining = 0

                record_ip_type = ok_ip_save_list[remaining]
            record_ip_type = record_ip_type.replace(' ', '')
            record_ip_type = record_ip_type.replace('\n', '')
            record_ip = record_ip_type.split(',', 1)[0]
            record_types = record_ip_type.split(',')[1:]
            record_ip_list = record_ip.split(':')
            if len(record_ip_list) != 2:
                message = "Proxy doesn't work. Next proxy is testing...\n" \
                          "IP-Port: %s" % record_ip
                if not get_random:
                    message += "\tProxy Number: %s" % (remaining)
                if allow_print:
                    print(message)
                    print()
                again = True
                if save_false_proxies:
                    File.write_ok_and_false_proxy(record_ip_type,
                                                  error_file=error_file,
                                                  ok_file=ok_file)
                continue

            if for_https:
                if 'HTTPS' not in record_types:
                    again = True
                    continue
            else:
                if 'HTTP' not in record_types:
                    again = True
                    continue

            ip = record_ip_list[0]
            port = record_ip_list[1]

            proxy_decide = {}
            for type in record_types:
                proxy_decide[type.lower()] = "http://%s" % record_ip

            if save_false_proxies:
                if record_ip_type in error_ip_list:
                    File.write_ok_and_false_proxy(record_ip_type,
                                                  error_file=error_file,
                                                  ok_file=ok_file)
                    again = True
                    continue

            if run_test:
                try:
                    if check_internet:
                        internet_connection(timeout=test_timeout,
                                            reload_time=30,
                                            wait_for_network=True,
                                            sound_error=sound_error)

                    response = requests.get(url,
                                            proxies=proxy_decide,
                                            timeout=test_timeout,
                                            stream=True,
                                            headers=test_header)
                    if test_url:
                        # if any url overwritten on def, just check the status code.
                        if response.status_code != 200:
                            raise Exception
                    else:
                        response = response.json()
                        if response == {}:
                            raise Exception
                    if save_ok_proxies:
                        File.save_records_list(ok_file,
                                               ok_ip_save_list,
                                               overwrite=True,
                                               exit_all=False)
                except Exception as e:
                    message = "Proxy doesn't work. Next proxy is testing...\n" \
                              "IP: %s\tPort: %s" % (ip, port)
                    if not get_random:
                        message += '\tProxy Number: %s' % remaining
                    if allow_print:
                        print(message)
                        print()
                    again = True
                    if save_false_proxies:
                        File.write_ok_and_false_proxy(record_ip_type,
                                                      error_file=error_file,
                                                      ok_file=ok_file)
                    continue

            if selenium:
                proxy_decide = '--proxy-server=%s:%s' % (ip, port)
            else:
                # proxy_decide defined above.
                pass

            if allow_print:
                if get_random:
                    print("Proxy activated.\nIP: %s\tPort: %s" % (ip, port))
                else:
                    print(
                        "Proxy activated. Proxy Number: %s.\nIP: %s\tPort: %s"
                        % (count_loop, ip, port))

    if allow_print:
        print()
    if get_random:
        return proxy_decide, record_ip_type
    else:
        return count_loop, proxy_decide, record_ip_type
Ejemplo n.º 14
0
def connect_api(https=True,
                domain=None,
                endpoint='api/external_program/',
                code='all',
                program='',
                inform_user_periodically=False,
                show_error=False,
                sound_error=False,
                exit_all=False,
                extra_data={}):
    if not domain:
        domain = os.getenv("domain")

    start = time.time()
    time.sleep(0.01)
    x = 0
    db_settings_dict = {}
    if https:
        url_first = 'https'
    else:
        url_first = 'http'

    user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.108 Safari/537.36'

    while True:
        try:
            x += 1
            url = '%s://%s/%s' % (url_first, domain, endpoint)

            # Define, if needed (User-Agent, Accept, Referer etc.)
            headers = {
                "User-Agent": user_agent,
                # 'accept': '*/*',
                # 'accept-encoding': 'gzip, deflate, br',
                # 'accept-language': 'en-US,en;q=0.9,tr;q=0.8,pl;q=0.7',
            }
            if endpoint == 'api/external_program/':
                data = {
                    'key': code,
                    'program': program,
                }
                data.update(extra_data)
            else:
                data = {}

            response = requests.request("GET",
                                        url,
                                        headers=headers,
                                        data=data,
                                        timeout=10)
            response.encoding = 'UTF-8'
            response = response.json()

            # My API returns a dictionary which have 'ayar' and 'parametre' in keys.
            if code == 'all':
                if endpoint == 'api/external_program/':
                    for setting in response:
                        parameter = setting['parametre']
                        parameter = String.from_string_to_type(
                            parameter.lower, 'try_all')

                        db_settings_dict[setting['ayar']] = parameter
                else:
                    db_settings_dict = response

                return db_settings_dict
            else:
                if len(response) and response:
                    response = String.from_string_to_type(response, 'try_all')

                return response

        except Exception as e:
            if inform_user_periodically:
                if x % 2 == 0:
                    message = '\nAn error occurred while running, trying again...'
                    print()
                    print('-' * 40)
                    print(message)
                    print()
            if x >= 3:
                end = time.time()
                passed_time = end - start
                message = 'An error occurred while running program. Please try again.\n' \
                        '(Trying time: %s)' % Progress.time_definition(passed_time)
                if sound_error:
                    Progress.sound_notify()
                if show_error:
                    Progress.exit_app(message=message, exit_all=exit_all)
                    print()
                else:
                    if exit_all:
                        Progress.exit_app(exit_all=exit_all)
                break
Ejemplo n.º 15
0
def send_email(message,
               subject,
               recipients,
               attach_file_name=None,
               attach_file_text=None,
               login_mail=None,
               pwd=None,
               sender='Email Sender',
               sound_error=True,
               show_error=True,
               exit_all=False,
               debug_mode=0):

    if not login_mail:
        login_mail = os.getenv('login_mail')
    if not pwd:
        pwd = os.getenv('pwd')

    try:
        msg = MIMEMultipart()

        msg['Subject'] = subject
        msg['From'] = sender
        recipient = ", ".join(recipients)
        msg['To'] = recipient

        msg.attach(MIMEText(message))

        if attach_file_name and attach_file_text:
            f = StringIO()
            # write some content to 'f'
            f.write(attach_file_text)
            f.seek(0)

            attach = MIMEBase('application', "octet-stream")
            attach.set_payload(f.read())
            encoders.encode_base64(attach)

            attach.add_header('Content-Disposition',
                              'attachment',
                              filename=attach_file_name)
            msg.attach(attach)

        server = smtplib.SMTP('smtp.gmail.com', 587)
        server.set_debuglevel(debug_mode)  # Prints all process if debug == 1
        server.ehlo()
        server.starttls()

        server.ehlo()
        server.login(login_mail, pwd)

        # Send the email
        server.sendmail(sender, recipients, msg.as_string())
        server.close()
        return True
    except Exception as e:
        if sound_error:
            Progress.sound_notify()

        if show_error:
            message = '--> An error occurred while sending email.'
            Progress.exit_app(e=e, message=message, exit_all=exit_all)
        else:
            Progress.exit_app(e=e, exit_all=exit_all)

        return False
Ejemplo n.º 16
0
def read_records_data_to_dict(txt_file,
                              show_progress=True,
                              file_not_found_error=True,
                              exit_all=True):
    # This def is for reading data with columns like excel but from plain text file
    read_dict = dict()
    try:
        total = 0
        file_exist = True
        try:
            file = open(txt_file, 'r', encoding='utf-8')
            for line in file:
                total += 1
            file.close()
        except FileNotFoundError:
            file_exist = False
            if file_not_found_error:
                message = "--> File coulnd't be found in folder. --> '%s'" % txt_file
                Progress.exit_app(message=message, exit_all=exit_all)

        if file_exist:
            count = 0
            file = open(txt_file, 'r', encoding='utf-8')
            now = time.time()
            time.sleep(0.01)
            new_line = True
            key = 1
            for line in file:
                if key not in read_dict.keys():
                    read_dict[key] = list()

                line = line[:-1]
                if show_progress:
                    count += 1
                    Progress.progress(
                        count=count,
                        total=total,
                        now=now,
                        message='Reading records...',
                    )

                if line == '-' * 40:
                    key += 1
                    new_line = True
                    continue
                if line == '-' * 20:
                    new_line = True
                    continue

                if new_line:
                    read_dict[key].append(line)
                else:
                    read_dict[key][-1] = read_dict[key][-1] + line
                new_line = False

            if show_progress:
                print()

            file.close()
            for key in list(read_dict.keys()):
                if not len(read_dict[key]):
                    del read_dict[key]
    except Exception as e:
        if show_progress:
            print()
        message = "--> An error occurred while reading file -> '%s'" % txt_file
        Progress.exit_app(e=e, message=message, exit_all=exit_all)

    return read_dict
Ejemplo n.º 17
0
def create_word(word, my_rows):
    try:
        # Check and add docx if there is not at the end.
        file_name, file_extension = os.path.splitext(word)
        if file_extension != '.docx':
            word = file_name + '.xlsx'

        message = "'%s'  --> Creating..." % word
        total = len(my_rows)
        print(message)

        if not total:
            message = '\n--> No data.'
            print(message)
            return

        document = Document()
        """
        EXTRA DETAILS FOR DESIGN:     
    
        document.add_heading('Document Title', 0)
        
        p = document.add_paragraph('A plain paragraph having some ')
        p.add_run('bold').bold = True
        p.add_run(' and some ')
        p.add_run('italic.').italic = True
        
        document.add_heading('Heading, level 1', level=1)
        
        document.add_picture('monty-truth.png', width=Inches(1.25))
        records = (
            (3, '101', 'Spam'),
            (7, '422', 'Eggs'),
            (4, '631', 'Spam, spam, eggs, and spam')
        )

        document.add_page_break()
        """

        styles = document.styles

        style = document.styles['Normal']
        font = style.font
        font.name = 'Arial'
        font.size = Pt(12)

        style = styles.add_style('MyHeader1', WD_STYLE_TYPE.PARAGRAPH)
        font = style.font
        font.bold = True
        font.name = 'Arial'
        font.size = Pt(14)

        style = styles.add_style('MyHeader2', WD_STYLE_TYPE.PARAGRAPH)
        font = style.font
        font.name = 'Arial'
        font.size = Pt(13)

        for row in my_rows:
            table = row.get('table', False)
            if not table:
                text = row.get('text', '')
                style_name = row.get('style', None)
                location = row.get('location', 0)
                bold = row.get('bold', False)
                italic = row.get('italic', False)
                underline = row.get('underline', False)
                size = row.get('size', 12)

                paragraph = document.add_paragraph()
                run = paragraph.add_run(text)

                if style_name:
                    style = document.styles[style_name]
                    paragraph.style = style
                else:
                    run.bold = bold
                    run.italic = italic
                    run.underline = underline

                    font = style.font
                    font.size = Pt(size)

                paragraph.alignment = location  # 0: left, 1: center, 2: right, 3: justify
            else:
                data = row.get('data', [])
                border = row.get('border', False)

                if len(data):
                    table_obj = document.add_table(
                        rows=0,
                        cols=len(data[0]),
                    )
                    if border:
                        table_obj.style = 'TableGrid'

                    for count_row, table_row in enumerate(data):
                        row_cells = table_obj.add_row().cells
                        for enum, cell in enumerate(table_row):
                            row_cells[enum].text = str(cell)

                        if count_row == 0:
                            for count_cell in range(len(table_row)):
                                row_cells[count_cell].paragraphs[0].runs[
                                    0].font.bold = True
                                row_cells[count_cell].paragraphs[
                                    0].alignment = 1

        print()
        document.save(word)

        message = "Word Created -->> '%s'" % (word)
        print(message)
    except PermissionError:
        message = "--> '%s' can't access to this file.\nIt is probably because the file is open. If this word is open, please close it and re-run program." % word
        Progress.exit_app(message=message, exit_all=False)
    except Exception as e:
        message = "--> An error occurred while creating file... '%s'" % word
        Progress.exit_app(e=e, message=message, exit_all=False)
Ejemplo n.º 18
0
def excel_read_to_dict(excel, number_of_sheet=0, exit_all=False):
    all_data = dict()
    headers = dict()

    try:
        # Check and add xlsx or xls if there is not at the end.
        file_name, file_extension = os.path.splitext(excel)
        if file_extension != '.xlsx' or file_extension != '.xls':
            excel = file_name + '.xlsx'

        # check all versions of the file name if it is exist in directory.
        # (Checking with all lower and capital characters for excel name if it is equal any file.)
        excel = find_file(excel)
        if not os.path.exists(excel):
            # So given file name could not be found in directory with any combinations of capital and lower characters.
            excel2 = None

            # switch between xlsx and xls
            if file_extension != '.xlsx':
                excel2 = file_name + '.xls'
            elif file_extension != '.xls':
                excel2 = file_name + '.xlsx'

            if excel2:
                # if given file name is xlsx, it switched to xls in "excel2"
                # if given file name is xls, it switched to xlsx in "excel2"
                # and checking again...
                excel2 = find_file(excel2)
                if not os.path.exists(excel2):
                    message = "! ! File couldn't be found in folder. --> '%s' or '%s'" % (
                        excel, excel2)
                    Progress.exit_app(message=message, exit_all=exit_all)
                    return all_data, headers
                else:
                    excel = excel2
            else:
                message = "! ! File couldn't be found in folder. --> '%s'" % (
                    excel)
                Progress.exit_app(message=message, exit_all=exit_all)
                return all_data, headers

        workbook = xlrd.open_workbook(excel)  # sheet
        sheet = workbook.sheet_by_index(number_of_sheet)  # page

        number_of_column = sheet.ncols
        number_of_row = len(sheet.col(0))

        count = 0
        total = number_of_row
        now = time.time()
        message = 'Reading excel...'
        time.sleep(0.01)

        number_of_data = 0
        number_of_header = 0

        for y in range(number_of_row):
            key = sheet.cell_value(rowx=y, colx=0)
            try:
                key = int(key)
            except:
                pass

            # I only get integer keys which means excel rows which has integer at first cell.
            # This is for not getting header rows in my dictionary.
            # and I design my excels with ID column at first column.
            if isinstance(key, int):
                number_of_data += 1
                all_data[number_of_data] = list()
                for x in range(number_of_column):
                    val = sheet.cell_value(rowx=y, colx=x)
                    val = String.float_to_integer(val, force_number=False)
                    all_data[number_of_data].append(val)
            else:
                number_of_header += 1
                headers[number_of_header] = list()
                for x in range(number_of_column):
                    val = sheet.cell_value(rowx=y, colx=x)
                    val = String.float_to_integer(val, force_number=False)
                    headers[number_of_header].append(val)

            count += 1

            Progress.progress(
                count=count,
                total=total,
                now=now,
                message=message,
            )
    except PermissionError:
        message = "--> '%s' can't access to this file.\nIt is probably because the file is open. If this excel is open, please close it and re-run program." % excel
        Progress.exit_app(message=message, exit_all=exit_all)
    except Exception as e:
        message = "--> An error occurred while reading file... '%s'" % excel
        Progress.exit_app(e=e, message=message, exit_all=exit_all)

    print('\nNumber of item: %s' % len(all_data))
    # it returns a dictionary from 3 rows excel file as:
    # all_data = {
    #     1: ['1st Column Value', '2nd Column Value', '3rd Column Value', '4th Column Value', '5th Column Value', ],
    #     2: ['1st Column Value', '2nd Column Value', '3rd Column Value', '4th Column Value', '5th Column Value', ],
    #     3: ['1st Column Value', '2nd Column Value', '3rd Column Value', '4th Column Value', '5th Column Value', ],
    # }
    return all_data, headers
Ejemplo n.º 19
0
def excel_create(
    excel,
    all_data,
    headers=None,
    sizes=None,
    locations=None,
    page_name='Page1',
    exit_all=False,
):
    if not headers:
        headers = list()

    # Check and add xlsx or xls if there is not at the end.
    file_name, file_extension = os.path.splitext(excel)
    if file_extension != '.xlsx' or file_extension != '.xls':
        excel = file_name + '.xlsx'

    try:
        message = "'%s'  --> Creating..." % excel
        total = len(all_data)
        print(message)

        if not total:
            message = '\n--> No data.'
            print(message)
            return

        if len(all_data):
            length_max = 0
            for val in all_data.values():
                try:
                    if length_max < len(val):
                        # Find the row which has maximum length
                        if isinstance(val[-1], dict):
                            length_max = len(val) - 1
                        else:
                            length_max = len(val)
                except:
                    pass

            i = 0
            while len(headers) < length_max:
                # if maximum length of any row larger than HEADERS, add "Header %i" rest of the headers
                i += 1
                headers.append('Header %s' % i)

            for key in list(all_data.keys()):
                # if length of Headers larger than any row, add empty cell end of the row
                while len(headers) > len(all_data[key]):
                    all_data[key].append('')

        if sizes:
            while len(headers) > len(sizes):
                sizes.append(20)
        else:
            sizes = list()
            for head in headers:
                sizes.append(25)

        if locations:
            while len(headers) > len(locations):
                locations.append('left')
        else:
            locations = list()
            for head in headers:
                locations.append('left')

        attrs_loc = dict()
        for val in all_data.values():
            for elem in val:
                if isinstance(elem, dict):
                    for name, attr in elem.items():
                        if name not in attrs_loc.keys():
                            headers.append(name)
                            sizes.append(20)
                            locations.append('left')

                            attrs_loc[name] = len(headers)

        workbook = xlsxwriter.Workbook(excel)
        worksheet = workbook.add_worksheet(page_name)

        worksheet.freeze_panes(1, 0)

        cell_format_header = workbook.add_format({'border': 1})
        cell_format_header.set_pattern(1)
        cell_format_header.set_bg_color('orange')
        cell_format_header.set_align('center')
        cell_format_header.set_align('vcenter')
        cell_format_header.set_bold()

        cell_format_center_regular = workbook.add_format({'border': 1})
        cell_format_center_regular.set_align('center')
        cell_format_center_regular.set_align('vcenter')

        cell_format_regular = workbook.add_format({'border': 1})
        cell_format_regular.set_align('left')
        cell_format_regular.set_align('vcenter')

        cell_format_right_regular = workbook.add_format({'border': 1})
        cell_format_right_regular.set_align('right')
        cell_format_right_regular.set_align('vcenter')

        cell_format_copyr = workbook.add_format({'border': 1})
        cell_format_copyr.set_pattern(1)
        cell_format_copyr.set_bg_color('FABF8F')
        cell_format_copyr.set_align('center')
        cell_format_copyr.set_align('vcenter')
        cell_format_copyr.set_bold()

        row = 0
        col = 0
        set_say = 0
        worksheet.write(row, col, 'ID', cell_format_header)
        worksheet.set_column(set_say, set_say, 8)
        set_say += 1
        col += 1
        for head, size in zip(headers, sizes):
            worksheet.write(row, col, head, cell_format_header)
            worksheet.set_column(set_say, set_say, size)
            set_say += 1
            col += 1
        worksheet.write(row, col, 'Automated by BerkayMizrak.com',
                        cell_format_copyr)
        worksheet.set_column(set_say, set_say, 34)
        row += 1

        count = 0
        now = time.time()
        time.sleep(0.01)

        id_count = 0
        for val in all_data.values():
            id_count += 1
            col = 0
            worksheet.write(row, col, id_count, cell_format_center_regular)
            for elem in val:
                if isinstance(elem, dict):
                    continue

                col += 1
                if col > len(locations):
                    go_left = True
                else:
                    go_left = False
                    if locations[col - 1] == 'center':
                        worksheet.write(row, col, elem,
                                        cell_format_center_regular)
                    elif locations[col - 1] == 'right':
                        worksheet.write(row, col, elem,
                                        cell_format_right_regular)
                    else:
                        go_left = True
                if go_left:
                    try:
                        elem = int(elem)
                        worksheet.write(row, col, elem,
                                        cell_format_center_regular)
                    except:
                        worksheet.write(row, col, elem, cell_format_regular)

            for elem in val:
                if isinstance(elem, dict):
                    for name, attr in elem.items():
                        worksheet.write(row, attrs_loc[name], attr,
                                        cell_format_regular)

            row += 1

            count += 1
            Progress.progress(
                count=count,
                total=total,
                now=now,
            )

        print()
        workbook.close()
        message = "'%s' Data Saved to Excel -->> '%s'" % (count, excel)
        print(message)
    except PermissionError:
        message = "--> '%s' can't access to this file.\nIt is probably because the file is open. If this excel is open, please close it and re-run program." % excel
        Progress.exit_app(message=message, exit_all=exit_all)
    except Exception as e:
        message = "--> An error occurred while creating file... '%s'" % excel
        Progress.exit_app(e=e, message=message, exit_all=exit_all)