예제 #1
0
    def open_browser(self):
        """
            This entire first block of code is randomizing the browser's user agent info
            Apparently Canvas really doesn't like Selenium / Automation tools, so I needed
            to obscure the user agent info to prevent them from detecting it.
            
            The last two lines are actually opening the browser.
        """

        software_names = [SoftwareName.CHROME.value]
        operating_systems = [
            OperatingSystem.WINDOWS.value, OperatingSystem.LINUX.value
        ]

        user_agent_rotator = UserAgent(software_names=software_names,
                                       operating_systems=operating_systems,
                                       limit=100)

        # Get list of user agents.
        user_agents = user_agent_rotator.get_user_agents()

        # Get Random User Agent String.
        user_agent = user_agent_rotator.get_random_user_agent()

        # Open the browser to the designated URL
        self.browser.get(self.url)
        # Make sure Alamo is in the tab title
        assert 'Alamo' in self.browser.title
예제 #2
0
    def get_user_agents():
        software_names = [SoftwareName.CHROME.value]
        operating_systems = [OperatingSystem.LINUX.value]

        user_agent_rotator = UserAgent(software_names=software_names,
                                       operating_systems=operating_systems,
                                       limit=100)
        user_agents = user_agent_rotator.get_user_agents()

        user_agent = user_agent_rotator.get_random_user_agent()
        headers = {'userAgent': 'python 3.7.5', 'platform': user_agent}
        return (headers)
예제 #3
0
class RandomUserAgent:
	def __init__(self):
		self.software_names = [SoftwareName.CHROME.value]
		self.os = [OperatingSystem.WINDOWS.value, OperatingSystem.LINUX.value]
		self.user_agent_rotator = UserAgent(software_names=self.software_names, operating_system=self.os, limit=100)
		self.user_agents = self.user_agent_rotator.get_user_agents()

	def get_random_number(self):
		return random.randint(0, 99)

	def get_random_user_agents(self):
		return self.user_agents[self.get_random_number()]["user_agent"]
예제 #4
0
    def getRandomUserAgent(self):
        software_names = [
            SoftwareName.CHROME.value, SoftwareName.FIREFOX.value,
            SoftwareName.EDGE.value, SoftwareName.INTERNET_EXPLORER.value,
            SoftwareName.ANDROID.value
        ]
        operating_systems = [
            OperatingSystem.WINDOWS.value, OperatingSystem.LINUX.value
        ]
        user_agent_rotator = UserAgent(software_names=software_names,
                                       operating_systems=operating_systems,
                                       limit=100)
        user_agents = user_agent_rotator.get_user_agents()
        user_agent = user_agent_rotator.get_random_user_agent()
        self.__log__.debug('using user agent: ' + str(user_agent))

        return user_agent
예제 #5
0
def get_user_agent_driver():
    software_names = [SoftwareName.CHROME.value]
    operating_systems = [
        OperatingSystem.WINDOWS.value, OperatingSystem.LINUX.value
    ]
    user_agent_rotator = UserAgent(software_names=software_names,
                                   operating_systems=operating_systems,
                                   limit=100)
    user_agents = user_agent_rotator.get_user_agents()

    user_agent1 = user_agent_rotator.get_random_user_agent()
    options = Options()
    options.add_argument(f'—-headless')
    options.add_argument(f'—-no-sandbox')
    options.add_argument(f'—-disable-gpu')
    options.add_argument(f'—-window-size=1420,1080')
    options.add_argument(f'user-agent={user_agent1}')
    driver = webdriver.Chrome('../../data/chromedriver.exe', options=options)
    return driver
예제 #6
0
print('start ' + str(datetime.now()))

# In[71]:

# you can also import SoftwareEngine, HardwareType, SoftwareType, Popularity from random_user_agent.params
# you can also set number of user agents required by providing `limit` as parameter
software_names = [SoftwareName.CHROME.value]
operating_systems = [
    OperatingSystem.WINDOWS.value, OperatingSystem.LINUX.value
]
user_agent_rotator = UserAgent(software_names=software_names,
                               operating_systems=operating_systems,
                               limit=100)
# Get list of user agents.
user_agents = user_agent_rotator.get_user_agents()
user_agent = user_agent_rotator.get_random_user_agent()


# Get Random User Agent String.
def new_agent():
    return user_agent_rotator.get_random_user_agent()


# In[72]:

chromedriver = "/Applications/chromedriver"  # path to the chromedriver executable
os.environ["webdriver.chrome.driver"] = chromedriver
reviews = "https://apps.apple.com/us/app/eastmeeteast-1-asian-dating/id890664813#see-all/reviews"
#user_agent = {'User-agent': user_agent}
driver = webdriver.Chrome(chromedriver)
예제 #7
0
        "download.default_directory": f"{path_zip}",
        "download.prompt_for_download": False,
        "download.directory_upgrade": True,
        "safebrowsing_for_trusted_sources_enabled": False,
        "safebrowsing.enabled": False
    })
options.add_argument('--disable-gpu')
options.add_argument('--disable-software-rasterizer')
software_names = [SoftwareName.CHROME.value]
operating_systems = [
    OperatingSystem.WINDOWS.value, OperatingSystem.LINUX.value
]
user_agent_rotator = UserAgent(software_names=software_names,
                               operating_systems=operating_systems,
                               limit=100)
user_agents = user_agent_rotator.get_user_agents()  # Get list of user agents.
user_agent = user_agent_rotator.get_random_user_agent(
)  # Get Random User Agent String.
driver = webdriver.Chrome(chrome_options=options,
                          executable_path=r'chromedriver.exe')
driver.execute_cdp_cmd(
    f'Network.setUserAgentOverride',
    {"userAgent": f'{user_agent_rotator.get_random_user_agent()}'})
print(driver.execute_script("return navigator.userAgent;")
      )  # This would change the user string of the driver

try:
    driver.get('https://github.com/' + repo + '/archive/master.zip')
    time_to_wait = 1200
    time_counter = 0
    while not os.path.exists(path_zip + repo.split('/')[1] + "-master.zip"):
예제 #8
0
def main(image_id, image, excel):

    software_names = [SoftwareName.CHROME.value]
    operating_systems = [
        OperatingSystem.WINDOWS.value, OperatingSystem.LINUX.value
    ]
    user_agent_rotator = UserAgent(software_names=software_names,
                                   operating_systems=operating_systems,
                                   limit=100)
    # Get list of user agents.
    user_agents = user_agent_rotator.get_user_agents()

    if (image_id == None):
        return 'Ссылка на каталог -', ''
    info_url = 'https://obd-memorial.ru/html/info.htm?id={}'.format(image_id)
    img_info = 'https://obd-memorial.ru/html/getimageinfo?id={}'.format(
        image_id)
    print(info_url)
    res1 = requests.get(info_url, allow_redirects=True)
    dirpath = tempfile.mkdtemp()
    print('dirpath = ' + dirpath)
    # создаем каталог сразу - один раз
    #name_folder_save = str(image_id)+"_"+os.path.basename(tempfile.mktemp ())
    d = datetime.now()
    #.strftime('%Y-%m-%d:%H_%M_%S')
    #print(d.tzinfo) # Return time zone info
    #d = pytz.timezone('Europe/Paris').localize(d)
    #print(d.strftime('%Y-%m-%d %H:%M:%S'))
    name_folder_save = str(image_id) + ' ' + d.strftime('%Y-%m-%d %H:%M:%S')
    print('name_folder_save = ' + name_folder_save)
    #create catalog
    file_metadata = {
        'name': name_folder_save,
        'mimeType': 'application/vnd.google-apps.folder',
        'parents': [id_root_folder]
    }
    result = service.files().create(body=file_metadata,
                                    fields='id,webViewLink').execute()
    # id каталога для сохранения
    id_folder_save = result['id']
    # ссылка на каталог
    web_link = result['webViewLink']

    if (res1.status_code == 307):
        print(res1.status_code)
        print('*****************')
        if (not '3fbe47cd30daea60fc16041479413da2' in res1.cookies):
            # Удаляем каталог за ненадобностью
            result = service.files().delete(fileId=id_folder_save).execute()
            print('*****************************************')
            print(' delete catalog = ' + name_folder_save)
            pp.pprint(result)
            return 'no folder', 'Запись сводного документа не найдена'
        cookies = {}
        cookies['3fbe47cd30daea60fc16041479413da2'] = res1.cookies[
            '3fbe47cd30daea60fc16041479413da2']
        cookies['JSESSIONID'] = res1.cookies['JSESSIONID']
        #############################
        #   load list id's images   #
        #############################
        response = requests.get(img_info, cookies=cookies)
        response_dict = json.loads(response.text)
        print('response_dict = ' + str(len(response_dict)))
        #############################
        i = 0
        if (excel):
            row_num = 1
            workbook = Workbook()
            # Get active worksheet/tab
            worksheet = workbook.active
            worksheet.title = 'Person'
            columns = cols
            for col_num, column_title in enumerate(columns, 1):
                cell = worksheet.cell(row=row_num, column=col_num)
                cell.value = column_title
        # идем по списку id сканов
        # сохраняем имена файлов [id].jpg в list, потом его вернем для выгрузки Google Drive
        list_file = []
        for item in response_dict:
            i += 1
            #print(i, item['id'])
            if (excel):
                for id in item['mapData'].keys():
                    row_num += 1
                    row = get_info(item['id'], id, cookies)
                    #print('\t',id)
                    for col_num, cell_value in enumerate(row, 1):
                        cell = worksheet.cell(row=row_num, column=col_num)
                        cell.value = cell_value
            if (image):
                img_url = "https://obd-memorial.ru/html/images3?id=" + str(
                    item['id']) + "&id1=" + (getStringHash(
                        item['id'])) + "&path=" + item['img']
                headers_302 = parse_file(BASE_DIR + '/header_302.txt')
                headers_302['Cookie'] = make_str_cookie(cookies)
                headers_302['Referer'] = info_url
                req302 = requests.get(img_url,
                                      headers=headers_302,
                                      cookies=cookies,
                                      allow_redirects=False)
                if (req302.status_code == 302):
                    params = {}
                    params['id'] = str(item['id'])
                    params['id1'] = getStringHash(item['id'])
                    params['path'] = item['img']
                    headers_img = parse_file(BASE_DIR + '/header_img.txt')
                    header_img[
                        'User-Agent'] = user_agent_rotator.get_random_user_agent(
                        )
                    headers_img['Referer'] = info_url
                    #####################
                    req_img = requests.get(
                        "https://cdn.obd-memorial.ru/html/images3",
                        headers=headers_img,
                        params=params,
                        cookies=cookies,
                        stream=True,
                        allow_redirects=False)
                    #####################
                    if (req_img.status_code == 200):
                        location = os.path.abspath(dirpath + "/" +
                                                   str(item['id']) + '.jpg')
                        f = open(location, 'wb')
                        f.write(req_img.content)
                        f.close()
                        list_file.append(dirpath + "/" + str(item['id']) +
                                         '.jpg')

                        name = str(item['id']) + '.jpg'
                        file_metadata = {
                            'name': name,
                            'parents': [id_folder_save]
                        }

                        try:
                            media = MediaFileUpload(dirpath + "/" +
                                                    str(item['id']) + '.jpg',
                                                    resumable=True,
                                                    chunksize=-1,
                                                    mimetype='image/jpg')
                            r = service.files().create(body=file_metadata,
                                                       media_body=media,
                                                       fields='id').execute()
                        except HttpError as e:
                            print('ERROR *************************')
                            print(e)
                            if e.resp.status in [404]:
                                # Start the upload all over again.
                                print("ERROR404 ********")
                            elif e.resp.status in [500, 502, 503, 504]:
                                print("ERROR 50* ********")
                                # Call next_chunk() again, but use an exponential backoff for repeated errors.
                            else:
                                print('OK')
                            # Do not retry. Log the error and fail.
                            print('ERROR *************************')
        if (excel):
            name = str(item['id']) + '.xlsx'
            file_metadata = {'name': name, 'parents': [id_folder_save]}
            workbook.save(filename=dirpath + "/" + str(item['id']) +
                          '_book.xlsx')
            media = MediaFileUpload(
                dirpath + "/" + str(item['id']) + '_book.xlsx',
                resumable=True,
                chunksize=-1,
                mimetype=
                'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
            )
            r = service.files().create(body=file_metadata,
                                       media_body=media,
                                       fields='id').execute()

        # загружаем файлы на GoogleDrive
        '''
        for _file in list_file:
            name = os.path.basename(_file)
            print(_file)
            file_metadata = {'name': name,'parents': [id_folder_save]}
            media = MediaFileUpload(_file, resumable=True,chunksize=-1, mimetype = 'image/jpg')
            r = service.files().create(body=file_metadata, media_body=media, fields='id').execute()
        # Определяем - записались ли файлы в каталог
        '''
        result = service.files().list(
            pageSize=1000,
            fields="nextPageToken, files(id, name, mimeType,webViewLink)",
            q=Template("name contains '$name_folder_save'").safe_substitute(
                name_folder_save=name_folder_save)).execute()
        if (result['files']):
            return web_link, name_folder_save
        else:
            return 'no folder', 'records not found'