def __init__(self, username): self.driver = config_driver() self.username = str(username) self.name = 'douban_' + self.username config_log(self.name) load_cookies(self.driver, self.name) self.posts = douban['posts'].get(self.username) self.up_posts()
def __init__(self, username): self.driver = config_driver() self.post_count = hupu['posts_count'] self.user_id = hupu['user_id'] self.commentaries = hupu['commentaries'] self.comment_count = 0 self.posts = Queue() self.max_error_num = 5 self.username = str(username) self.name = 'hupu_' + self.username config_log(self.name) load_cookies(self.driver, self.name) self.comment_posts()
def analysis_share_page(detail_url: str) -> str: rid = detail_url.split('/')[-1] logging.info("rid is %s", rid) res = s.post(SHARE_URL, data={"rid": rid}, cookies=load_cookies()).json() share_code = res['data'].split('/')[-1] logging.info("Share code is %s", share_code) share_url = SHARE_WEB.format(code=share_code) logging.info("Share url %s", share_url) return share_url
def analysis_share_page(detail_url: str) -> (str, dict): rid = detail_url.split('/')[-1] res = s.post(SHARE_URL, data={"rid": rid}, cookies=load_cookies()).json() share_code = res['data'].split('/')[-1] share_url = SHARE_WEB.format(code=share_code) logging.info("Share url is %s", share_url) # get api response api_response = s.get(SHARE_API.format(code=share_code)).json() return share_url, api_response
def get_search_html(kw: str) -> str: if not os.path.exists(cookie_file): logging.warning("Cookie file not found") login() if not is_cookie_valid(): login() cookie = load_cookies() logging.info("searching for %s", kw) r = s.get(SEARCH_URL.format(kw=kw), cookies=cookie) r.close() return r.text
def __init__(self, hunter): post_data.update({ 'fulltext': hunter['keyword'], 'exparea': hunter['area'] }) self.post_data = post_data self.case_id = hunter['case_id'] self.username = hunter['username'] filename = generate_filename_by_username(self.username) if not os.path.exists(filename): get_cookies() self.cookie = load_cookies(filename)
logging.info('Total tasks: %s' % len(local_args)) success,failure = [],[] for provider,arg in local_args: sources = download_sources(provider,arg) if arg['no_upload']: logger.warn('Not uploading - no_upload sepceified on this resource') else: result,dirty = upload_sources(sources,arg,report_progress if global_args['show_progress'] else lambda current,max:None ) if not dirty:success.append((arg,result)) else: failure.append((arg,None)) if not failure:sys.exit(0) logging.warning('Dirty flag set,not all tasks are done properly') sys.exit(1) if __name__ == "__main__": setup_logging() global_args,local_args = prase_args(sys.argv) '''Parsing args''' save_cookies(global_args['cookies']) '''Saving / Loading cookies''' if not setup_session(load_cookies()): logging.fatal('Unable to set working directory,quitting') sys.exit(2) else: self_info = sess.Self if not 'uname' in self_info['data']: logger.error('Invalid cookies: %s' % self_info['message']) sys.exit(2) logger.warning('Bilibili-toolman - operating as %s' % self_info['data']['uname']) __tasks__()
def is_cookie_valid() -> bool: cookie = load_cookies() r = s.get(GET_USER, cookies=cookie) logging.info("cookie valid? %s", r.json()) return r.json()['status'] == 1
def is_cookie_valid() -> bool: cookie = load_cookies() r = s.get(GET_USER, cookies=cookie) return r.json()['status'] == 1
def __init__(self, directory=None, profile=None, hashtag=None, add_metadata=False, get_videos=False, videos_only=False, jobs=16, template="{id}", url_generator=default, dump_json=False, dump_only=False, extended_dump=False): """Create a new looter instance. Keyword Arguments: directory (`str`): where downloaded medias will be stored **[default: None]** profile (`str`): a profile to download media from **[default: None]** hashtag (`str`): a hashtag to download media from **[default: None]** add_metadata (`bool`): Add date and comment metadata to the downloaded pictures. **[default: False]** get_videos (`bool`): Also get the videos from the given target **[default: False]** videos_only (`bool`): Only download videos (implies ``get_videos=True``). **[default: False]** jobs (`bool`): the number of parallel threads to use to download media (12 or more is advised to have a true parallel download of media files) **[default: 16]** template (`str`): a filename format, in Python new-style-formatting format. See the the Template page of the documentation for available keys. **[default: {id}] url_generator (`function`): a callable that takes a media dictionary as argument and returs the URL it should download the picture from. The default tries to get the best available size. **[default: `urlgen.default`]** dump_json (`bool`): Save each resource metadata to a JSON file next to the actual image/video. **[default: False]** dump_only (`bool`): Only save metadata and discard the actual resource. **[default: False]** extended_dump (`bool`): Attempt to fetch as much metadata as possible, at the cost of more time. Set to `True` if, for instance, you always want the top comments to be downloaded in the dump. **[default: False]** """ if profile is not None and hashtag is not None: raise ValueError("Give only a profile or an hashtag, not both !") if profile is not None: self.target = profile self._page_name = 'ProfilePage' self._section_name = 'user' self._base_url = "https://www.instagram.com/{}/" elif hashtag is not None: self.target = hashtag self._page_name = 'TagPage' self._section_name = 'tag' self._base_url = "https://www.instagram.com/explore/tags/{}/" else: self.target = None # Create self.directory if it doesn't exist. if directory is not None and not os.path.exists(directory): os.makedirs(directory) self.template = template self._required_template_keys = self._RX_TEMPLATE.findall(template) self.url_generator = url_generator if not callable(url_generator): raise ValueError("url_generator must be a callable !") self.directory = directory self.add_metadata = add_metadata self.get_videos = get_videos or videos_only self.videos_only = videos_only self.dump_json = dump_json or dump_only self.dump_only = dump_only self.extended_dump = extended_dump self.jobs = jobs self.session = requests.Session() self.session.cookies = six.moves.http_cookiejar.LWPCookieJar( self.COOKIE_FILE) load_cookies(self.session) self.user_agent = ( "Mozilla/5.0 (Windows NT 10.0; WOW64; " # seems legit "rv:50.0) Gecko/20100101 Firefox/50.0") self.dl_count = 0 self.metadata = {} self._workers = [] self.dl_count_lock = threading.Lock() self.session.headers.update({ 'User-Agent': self.user_agent, 'Accept': 'text/html', 'Accept-Encoding': 'gzip, deflate', 'Connection': 'keep-alive', 'Host': 'www.instagram.com', 'DNT': '1', 'Upgrade-Insecure-Requests': '1', }) atexit.register(self.__del__)
def login_get_cookies(): cookies = utils.load_cookies() if cookies: session = requests.Session() session.headers['User-Agent'] = User_Agent session.cookies = requests.utils.cookiejar_from_dict(cookies) return session,cookies chrome_options = Options() chrome_options.add_argument("--headless") driver = webdriver.Chrome(executable_path='./chromedriver_win32/chromedriver.exe') url = 'https://accounts.douban.com/passport/login' while 1: driver.get(url) link = driver.find_element_by_xpath("//ul[@class='tab-start']/li[@class='account-tab-account']") link.click() name_input = driver.find_element_by_xpath("//div[@class='account-form-field']/input[@id='username']") pass_input = driver.find_element_by_xpath("//div[@class='account-form-field']/input[@id='password']") remember_input = driver.find_element_by_xpath("//div[@class='account-form-ft']/p[@class='account-form-remember']/input[@id='account-form-remember']") login_botton = driver.find_element_by_xpath("//div[@class='account-form-field-submit ']/a[@class='btn btn-account']") name_input.clear() name_input.send_keys(username) pass_input.clear() pass_input.send_keys(password) remember_input.click() login_botton.click() start_ts = time.time() print("start..",start_ts) try: WebDriverWait(driver,15).until_not(lambda x:x.find_element_by_xpath("//div[@class='account-form-field-submit ']/a[@class='btn btn-account btn-active']").is_displayed()) WebDriverWait(driver, 15).until_not(lambda x: x.find_element_by_xpath("//div[@class='account-form-field-submit ']/a[@class='btn btn-account']").is_displayed()) #wait until login except: import tracback print(tracback.format_exc()) utils.save_html('exc_inex.html',driver.page_source) import os os._exit(-1) print ('end..',time.time()-start_ts) driver.save_screenshot('submit.png') utils.save_html('index.html',driver.page_source) if u'douban' in driver.page_source: selenium_cookies = driver.get_cookies() print ("selenium_cookies:",selenium_cookies) driver.close() break else: driver.close() #handle cookies session = requests.Session() session.headers['User-Agent'] = User_Agent for i in selenium_cookies: requests.utils.add_dict_to_cookiejar(session.cookies, {i['name']: i['value']}) cookies = requests.utils.dict_from_cookiejar(session.cookies) utils.save_cookies(cookies) return session,cookies