def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) with warnings.catch_warnings(): warnings.simplefilter("ignore") self.web_api = Client( proxy=self.proxies.get("https") if self.proxies else None, auto_patch=True, drop_incompat_keys=False) self.end_cursors = DefaultAttrDict(lambda: None)
def run(): web_api = Client(auto_patch=True, drop_incompat_keys=False) feed = [] startup = True user_dict = { "SpaceX": "20311520", "jclishman.testing": "7400533474" } while True: for id_str in list(user_dict.values()): try: feed.append(web_api.user_feed(id_str, count=1)) time.sleep(5) except Exception as e: #logger.error(str(e)) #logger.error("Error getting feed. Sleeping for 30s") time.sleep(30) for post in feed: post = post[0]["node"] user_id_str = post["owner"]["id"] shortcode = post["shortcode"] timestamp = post["created_time"] # Empty string if there isn't a caption try: caption = post["caption"]["text"] except: caption = '' # Match ID number to screenname for screen_name, id_str in user_dict.items(): if user_id_str == id_str: user_screen_name = screen_name stored_timestamp = db.get_instagram_timestamp(user_screen_name) if int(timestamp) > stored_timestamp: start_time = time.time() db.update_instagram_timestamp(user_screen_name, int(timestamp)) logger.info(f"New Instagram post by @{user_screen_name}, id {user_id_str}") logger.info(f"Post shortcode: {shortcode}") logger.info(f"Post caption: {caption}") logger.info(f"Post timestamp: {timestamp}") url = f"https://instagram.com/p/{shortcode}" if not startup: db.insert_message('Instagram', user_screen_name, caption.replace("\n", " "), url, start_time) time.sleep(10) startup = False
def instagram_feed(user_handle="google", user_id=1067259270): feed_list = [] user_id = user_id #TODO:generate user_id given a user_handle web_api = Client(auto_patch=True, drop_incompat_keys=False) user_feed_info = web_api.user_feed(user_id, count=50) #gets fifty user feeds for feeds in user_feed_info: try: raw_item = feeds["node"] date = datetime.fromtimestamp( int(raw_item.get('taken_at_timestamp'))) feed_info = { "provider": "instagram", "provider_handle": user_handle or '', "link": raw_item["link"] or '', "likes": raw_item["likes"]["count"] or 0, "media": [], "video_views": raw_item.get('video_view_count') or 0, "caption": raw_item["edge_media_to_caption"]["edges"][0]["node"]["text"] or '', } feed_info['pubDate'] = date.strftime( '%a, %d %b %Y %H:%M:%S') + ' GMT' img_link = raw_item.get('display_src') or raw_item.get( 'thumbnail_src') if img_link: feed_info['media'].append(img_link) if raw_item["is_video"]: feed_info["videos"] = raw_item["display_url"] vid_link = feed_info["videos"] if vid_link: feed_info['media'].append(vid_link) feed_list.append(feed_info) except: raise ("Could not get instagram feed or Feed does not exist") return app.response_class(BytesIO(json.dumps(feed_list)), content_type='application/json')
def scrape(event, context, page_size=settings.PAGE_SIZE): # Instagram no longer enables scraping of pages # larger than 50. We want to enable scraping pages of # different sizes for trial purposes, but if # we try it with more than 50 Instagram will just block # the request assert page_size <= 50, "Page size must be <= 50" # extract the location and cursor from the arguments. # We use "event" and "context" in accordance with the Lambda # convention. location_id = event.get('location', settings.DEFAULT_LOCATION) end_cursor = event.get('cursor', None) # instantiate the instagram web client web_api = Client(auto_patch=True, drop_incompat_keys=False) # query a page of this location's feed location_feed_info = web_api.location_feed( location_id, count=page_size, end_cursor=end_cursor, # proxy=proxy.get_random_http_proxy() ) if location_feed_info['status'] != "ok": raise # massage the GraphQL response into a more usable form location = location_feed_info['data']['location'] media = location['edge_location_to_media'] # top_posts = location['edge_location_to_top_posts'] del location['edge_location_to_media'] del location['edge_location_to_top_posts'] cursor = media['page_info']['end_cursor'] # has_next_page = media['page_info']['has_next_page'] posts = [node['node'] for node in media['edges']] # return the response return { "cursor": cursor, "data": posts, }
def auth(self): settings = self._settings_manager.get_settings() client_params = dict( auto_patch=True, authenticate=True, username=self.username, password=self._password, ) if not settings: logging.debug( f"Unable to find file: {self._settings_manager.setting_file_name!s}" ) client_params.update( dict(on_login=lambda x: onlogin_callback(x, self.username))) else: cached_settings = self._settings_manager.get_settings() logging.debug( f"Reusing settings: {self._settings_manager.setting_file_name!s}" ) client_params.update(dict(settings=cached_settings)) self._api = Client(**client_params) # Show when login expires cookie_expiry = self._api.cookie_jar.auth_expires logging.debug("Cookie Expiry: {0!s}".format( datetime.datetime.fromtimestamp(cookie_expiry).strftime( "%Y-%m-%dT%H:%M:%SZ")))
def __init__(self, username, password, API=None, action_interval=8.0, rate=120, interval=5400, unfollow_all=True): self.username = username self.password = password self.action_interval = action_interval self.rate = rate self.interval = interval self.unfollow_all = unfollow_all try: from app import logger logger = get_logger() except ImportError: pass self.logger = logging.LoggerAdapter(logger, { 'user': self.username, 'bot': 'instaunfollow' }) self.API = Client(self.username, self.password) if API is None else API self.webAPI = WebClient()
def __init__(self, username, password, similar_users, API=None, action_interval=8.0, rate=75, interval=5400): self.username = username self.password = password if isinstance(similar_users, str): self.similar_users = [x.strip() for x in similar_users.split(",")] else: self.similar_users = similar_users self.action_interval = action_interval self.rate = rate self.interval = interval self.logger = logging.LoggerAdapter(logger, { 'user': self.username, 'bot': 'instafollow' }) self.API = Client(self.username, self.password) if API is None else API self.webAPI = WebClient()
def main(): if ACCESS_TOKEN == DEFAULT_ACCESS_TOKEN: print "Please update your Dingtalk webhook access token" return path = INSTAGRAM_PATH_HOTTEST if INSTAGRAM_IS_FEED_HOTTEST else INSTAGRAM_PATH_LATEST web_api = Client(auto_patch=True, drop_incompat_keys=False) while True: # Load up post have already been posted old_posts = [] with open("old_posts.txt", "r") as posts_file: for line in posts_file: old_posts.append(line.rstrip()) # Traverse to post list in response data feed = web_api.tag_feed(INSTAGRAM_TAG)["data"]["hashtag"] feed = feed[path]["edges"] for post in feed: post = post["node"] postId = post["id"] picUrl = post["display_url"] shortCode = post["shortcode"] isVideo = post["is_video"] if postId in old_posts: print "{} already exist".format(postId) continue else: with open("old_posts.txt", "a") as posts_file: posts_file.write(postId+"\n") old_posts.append(postId) if not isVideo: send2Ding(postId, picUrl, shortCode) time.sleep(REFRESH_DURATION)
def get_client(scraper_username, scraper_password, proxy): settings_path = settings_file_path.format(scraper_username) try: if not os.path.isfile(settings_path): # logger.info('[{}] Logging in'.format(scraper_username)) # logger.info('Username: {} Password: {} Proxy: {}'.format(scraper_username, scraper_password, proxy)) # proxy='http://138.197.49.55:50000' return Client(username=scraper_username, password=scraper_password, authenticate=True, proxy=proxy, on_login=lambda x: on_login_callback(x, settings_path)) else: with open(settings_path) as file_data: cached_settings = json.load(file_data, object_hook=from_json) # logger.info('[{}] Reusing settings: {}'.format(scraper_username, settings_path)) device_id = cached_settings.get('device_id') # reuse auth settings return Client( username=scraper_username, password=scraper_password, settings=cached_settings, proxy=proxy) except Exception as e: logger.error('Authentication failed') logger.error(e)
def rotate_proxies(event, context): proxies = get_proxies() # Picking random proxy to use. proxy = random.choice(proxies) final = '' # Replace this with insta/dynamoDB scraping. # Tested with icanhazip.com API (returns proxied ip address). for n in range(0, 12): # Wait random time to send request. wait_time = random.uniform(0.1, 2.5) time.sleep(wait_time) # Request to icanhazip, which returns IP address that is used. req = Request('http://icanhazip.com') req.set_proxy(proxy['ip'] + ':' + proxy['port'], 'http') # Creating proxy url. Instantiating client. spoof = 'http://' + proxy['ip'] + ':' + proxy['port'] web_api = Client( auto_patch=True, drop_incompat_keys=False, proxy=spoof, timeout=30) # To test if the client works... # token = web_api.csrftoken # print("Token", token) # Change the proxy ip/port combo every 2. if n % 2 == 0: proxy_index = random.randint(0, len(proxies) - 1) proxy = proxies[proxy_index] try: my_ip = urlopen(req).read().decode('utf8') final = final + '#' + str(n) + ':' + my_ip except: del proxies[proxy_index] final = final + '# ' + 'failed' proxy_index = random.randint(0, len(proxies) - 1) proxy = proxies[proxy_index] # Test that rotation works. print(final) response = { "statusCode": 200, "body": final } return response
def __init__(self, connection_url='localhost', port=27017): self.web_api = Client(auto_patch=True, drop_incompat_keys=False) self.mongo_client = MongoClient(connection_url, port=port)
class InstaFollow: def __init__(self, username, password, similar_users, API=None, action_interval=8.0, rate=75, interval=5400): self.username = username self.password = password if isinstance(similar_users, str): self.similar_users = [x.strip() for x in similar_users.split(",")] else: self.similar_users = similar_users self.action_interval = action_interval self.rate = rate self.interval = interval self.logger = logging.LoggerAdapter(logger, { 'user': self.username, 'bot': 'instafollow' }) self.API = Client(self.username, self.password) if API is None else API self.webAPI = WebClient() def _get_user_ids(self, save_to=None): self.logger.info('Collecting users to follow...') # Randomly select root account to search for users account = self.similar_users[randint(0, len(self.similar_users) - 1)] username_info = self.API.username_info(account) # Get root account id root_account_id = username_info.get('user').get('pk') # Get root account posts max_id = '' pages = 1 media_ids = [] for i in range(0, pages): user_feed = self.API.user_feed(root_account_id, max_id=max_id) media_items = user_feed.get('items') for media in media_items: media_ids.append(media.get('id')) max_id = user_feed.get('next_max_id') user_ids = [] for media_id in media_ids: media_likers = self.API.media_likers(media_id) try: users = media_likers.get('users') except ChunkedEncodingError as e: self.logger.error("Failed to retrieve user list", e) users = [] for user in users: id = user.get('pk') user_ids.append(id) user_ids = list(set(user_ids)) self.logger.info("Found {} new users...".format(len(user_ids))) return user_ids def _login(self): attempts = 0 while attempts <= 10: try: if self.API.login(): return True except Exception as e: self.logger.exception("Failed to login...") sleep(6) attempts += 1 return False def start(self): self.logger.info("Follow bot started...") users = [] while len(users) < 7000: users += self._get_user_ids() progress = 0 bad_requests = 0 successful_requests = 0 while users: progress += 1 # if not self.API.is_logged_in: # self.API.login() id = users.pop(0) res = self.API.friendships_create(id) if res.get("status", False) != "ok": users.append(id) bad_requests += 1 elif res.get("status", False) == "ok": successful_requests += 1 if bad_requests == 10: self.logger.info( "10 bad requests...sleeping for 3 mins 20 secs.") sleep(200) bad_requests = 0 if not (progress % self.rate): progress = 0 followings = self.webAPI.user_info2(self.username).get( "follows", {}).get("count", 0) if followings > 7000: break wait = uniform(self.interval * 0.9, self.interval * 1.1) self.logger.info( "Cycle ended for user {} with {} successful requests and {} followers...sleeping for {}mins" .format(self.username, successful_requests, followings, wait / 60)) successful_requests = 0 sleep(wait) # Sleep n seconds +/ 10% to induce randomness between each action sleep( uniform(self.action_interval * 0.9, self.action_interval * 1.1))
class InstagramCrawler: api = None user_id = '327416611' social_endpoint = None def __init__(self): self.api = Client(auto_patch=True, drop_incompat_keys=False) self.social_endpoint = os.getenv('SOCIAL_ENDPOINT', 'http://localhost:8080') def fetch(self, end_cursor=None): result = self.api.user_feed(self.user_id, count=50, extract=False, end_cursor=end_cursor) info = self.parse_http_result(result) for post in info['posts']: self.process_post(post) page_info = info['page_info'] if page_info.get('has_next_page', False): time.sleep(2) self.fetch(page_info['end_cursor']) def parse_http_result(self, result): status = result.get('status', 'error') if status != 'ok': sys.exit('api response not ok') data = result['data'] media = data['user']['edge_owner_to_timeline_media'] return { 'count': media['count'], 'posts': [edge['node'] for edge in media['edges']], 'page_info': media['page_info'] } def process_post(self, post): payload = self.parse_post(post) r = requests.put('%s/instagram' % self.social_endpoint, json=payload) if r.status_code != 200: sys.exit(r.text) def parse_post(self, post): text_edges = post['edge_media_to_caption']['edges'] if not text_edges: caption = '' tags = [] else: text = text_edges[0]['node']['text'] text = re.sub('\s+', ' ', text) text = re.sub('\.\s+', '', text) tags = list({ tag.strip().lower() for tag in re.findall('(?<=#)[^# ]+(?=#|$| )', text) }) caption = re.sub('(#[^# ]+ )*(#[^# ]+$)', '', text) return { 'shortcode': post['shortcode'], 'caption': caption, 'tags': tags, 'likes': post['likes']['count'], 'comments': post['comments']['count'], 'type': post['type'], 'thumbnail': post['images']['thumbnail']['url'], 'image': post['images']['standard_resolution']['url'], 'timestamp': datetime.utcfromtimestamp(int(post['created_time'])).replace( tzinfo=timezone.utc).isoformat() }
def public_info(username): api = Client() return json.dumps(api.user_info2(username), indent=4)
print('Client version: %s' % __version__) cached_auth = None if args.settings_file_path and os.path.isfile(args.settings_file_path): with open(args.settings_file_path) as file_data: cached_auth = json.load(file_data) api = None if not cached_auth and args.username and args.password: # start afresh without existing auth try: print('New login.') api = Client(auto_patch=True, drop_incompat_keys=False, username=args.username, password=args.password, authenticate=True) except ClientLoginError: print('Login Error. Please check your username and password.') sys.exit(99) cached_auth = api.settings if args.save: # this auth cache can be re-used for up to 90 days with open(args.settings_file_path, 'w') as outfile: json.dump(cached_auth, outfile) elif cached_auth and args.username and args.password: try: print('Reuse login.')
def __init__(self): self.api = Client(auto_patch=True, drop_incompat_keys=False) self.social_endpoint = os.getenv('SOCIAL_ENDPOINT', 'http://localhost:8080')
class InstagramSession(session.StreamSession): BATCH_COUNT = 25 def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) with warnings.catch_warnings(): warnings.simplefilter("ignore") self.web_api = Client( proxy=self.proxies.get("https") if self.proxies else None, auto_patch=True, drop_incompat_keys=False) self.end_cursors = DefaultAttrDict(lambda: None) @memo(region="long") def user_name_to_id(self, user_name): try: user_id = self.web_api.user_info2(user_name)["id"] except: raise SGException(f"user id for {user_name} not found") return user_id def get_feed_items(self, user_name, count=BATCH_COUNT): try: feed = self.web_api.user_feed( self.user_name_to_id(user_name), count=self.BATCH_COUNT, end_cursor=self.end_cursors[user_name]) except ClientConnectionError as e: logger.warn(f"connection error: {e}") for post in feed: try: cursor = (post["node"]["edge_media_to_comment"]["page_info"] ["end_cursor"]) if cursor: self.end_cursors[user_name] = cursor except KeyError: pass post_type = None post_id = post["node"]["id"] try: title = post["node"]["caption"]["text"].replace("\n", "") except TypeError: title = "(no caption)" media_type = post["node"]["type"] if media_type == "video": post_type = "video" # content = InstagramMediaSource(post["node"]["link"], media_type="video") # content = InstagramMediaSource(post["node"]["videos"]["standard_resolution"]["url"], media_type="video") content = self.provider.new_media_source( post["node"]["videos"]["standard_resolution"]["url"], media_type="video") elif media_type == "image": if "carousel_media" in post["node"]: post_type = "story" content = [ # InstagramMediaSource(m["images"]["standard_resolution"]["url"], media_type="image") self.provider.new_media_source( m["images"]["standard_resolution"]["url"], media_type="image") if m["type"] == "image" else # InstagramMediaSource(m["video_url"], media_type="video") self.provider.new_media_source(m["video_url"], media_type="video") if m["type"] == "video" else None for m in post["node"]["carousel_media"] ] else: post_type = "image" # content = InstagramMediaSource(post["node"]["images"]["standard_resolution"]["url"], media_type="image") content = self.provider.new_media_source( post["node"]["images"]["standard_resolution"]["url"], media_type="image") # raise Exception else: logger.warn(f"no content for post {post_id}") continue yield (AttrDict(guid=post_id, title=title.strip(), post_type=post_type, created=datetime.fromtimestamp( int(post["node"]["created_time"])), content=content))
import json from django.http import HttpResponse, JsonResponse from .models import user, selfPost, twitterPost, instaPost from dateutil.parser import parse import twitter from instagram_web_api import Client, ClientCompatPatch, ClientError, ClientLoginError import time TwitterApi = twitter.Api( consumer_key='tYbGmWUZkn4eNeJwG00t9N7Si', consumer_secret='OoKHVW52WU7p2hbYo0dHp1Nb2zwk1wgwRUjQ6YaKauAFVehdl2', access_token_key='3288956831-FpFDCVDeupp192qXpuJmOIC9VvT1gHXoHP0PQKc', access_token_secret='GcSIrBQy4c23dCvt5xPWSjg7ItT8tlyGrYctSzHjmBmd7') InstaApi = Client(auto_patch=True, drop_incompat_keys=False) def getUser(request, username): u = user.objects.get(username=username) result = { "username": u.username, "twitterID": u.twitterID, "InstagramID": u.instaID } return JsonResponse(result, safe=False, status=200) def getPosts(request, username): results = [] u = user.objects.get(username=username)
from instagram_web_api import Client, ClientCompatPatch, ClientError, ClientLoginError from sightengine.client import SightengineClient client = SightengineClient('630881392', 'St5TPUomwvLYq7eiXd4G') web_api = Client(auto_patch=True, drop_incompat_keys=False) user_feed_info = web_api.user_feed('232192182') def checkDrugs(my_url): output = client.check('wad').set_url(my_url) drugs = output['drugs'] return drugs def checkWeapons(my_url): output = client.check('wad').set_url(my_url) weapons = output['weapon'] return weapons def checkAlcohol(my_url): output = client.check('wad').set_url(my_url) alcohol = output['alcohol'] return alcohol def checkCaption(my_url): output = client.check('wad').set_url(my_url) print(output)
def get_feed(twitter_api): last = src.last.get_last(src.last.PostType.MEDIA) highest = last web_api = Client(auto_patch=True, drop_incompat_keys=False) user_feed = web_api.user_feed(os.getenv('INSTAGRAM_USERID'), count=23) for post in reversed(user_feed): # ID comes in the format 'POSTID_USERID' post_id = int(post['node']['id'].split('_')[0]) # If has not been processed already if post_id > last: # Hashtag tweet_metadata = ['#鈴木このみ', ' '] # Format timestamp timestamp = datetime.datetime.fromtimestamp( post['node']['taken_at_timestamp'], pytz.timezone('Asia/Tokyo')) tweet_metadata += [timestamp.strftime('%Y-%m-%d %H:%M'), '\n'] # Post URL tweet_metadata.append(post['node']['link']) # Caption caption = post['node']['caption']['text'] tweet_content = ['\n\n', caption] media = [] # List of tuples of (type, url) if post['node']['__typename'] == MediaType.GALLERY.value: list_idx = 0 list_type = None media_list = [] for gallery_item in post['node']['edge_sidecar_to_children'][ 'edges']: if gallery_item['node'][ '__typename'] == MediaType.VIDEO.value: if list_type is None: media.append([gallery_item['node']['video_url']]) elif list_type is MediaType.IMAGE: # Image list in progress # Commit current list and create new list with video media.append(media_list) media.append([gallery_item['node']['video_url']]) list_type = None media_list = [] else: if list_type is None: # No list in progress list_type = MediaType.IMAGE media_list.append( gallery_item['node']['display_url']) elif list_type is MediaType.IMAGE: # Image list in progress if len(media_list) > 4: # List is somehow overfull # Tweets only allow 4 images, so extra ones need to be split while len(media_list) >= 4: media.append(media_list[:4]) media_list = media_list[4:] media_list.append( gallery_item['node']['display_url']) elif len(media_list) == 4: # List full # Commit current list and create new list media.append(media_list) media_list = [ gallery_item['node']['display_url'] ] else: # List not full yet media_list.append( gallery_item['node']['display_url']) # Commit unfinished list if exists if list_type is MediaType.IMAGE and len(media_list) > 0: media.append(media_list) elif post['node']['__typename'] == MediaType.VIDEO.value: media.append([post['node']['video_url']]) else: media.append([post['node']['display_url']]) tweet_str = twutils.truncate_status(''.join(tweet_metadata + tweet_content)) prev_status = 0 for tweet_media in media: replyto = None if (prev_status > 0): tweet_str = twutils.truncate_status( ''.join(tweet_metadata)) replyto = prev_status if os.getenv('ENV', 'dev') == 'production': prev_status = twitter_api.PostUpdate( tweet_str, tweet_media, in_reply_to_status_id=prev_status).id else: prev_status += 1 twitter_api.write(tweet_str + '\n\n') twitter_api.write('\n'.join(tweet_media) + '\n\n') # Update highest ID if higher if post_id > highest: highest = post_id if (highest > last): src.last.set_last(str(highest), src.last.PostType.MEDIA)
class WebAPIExportManager(object): def __init__(self, connection_url='localhost', port=27017): self.web_api = Client(auto_patch=True, drop_incompat_keys=False) self.mongo_client = MongoClient(connection_url, port=port) def write_to_mongo(self, collection, record): db = self.mongo_client.test_db collection = db[collection] collection.insert(record) #from data_gathering import WebAPIExportManager #api = WebAPIExportManager #api.get_account('...') def get_account(self, account_name='nadine__is'): try: full_data = self.web_api.user_info2(account_name) clear_data = { 'id': full_data.get('id'), 'date': datetime.utcnow(), 'name': full_data.get('full_name'), 'username': full_data.get('username'), 'followers': full_data.get('counts').get('followed_by'), 'posts': full_data.get('edge_owner_to_timeline_media').get('count'), } likes = comments = count = 0 for post in full_data.get('edge_owner_to_timeline_media').get( 'edges'): likes += post['node']['edge_liked_by']['count'] comments += post['node']['edge_media_to_comment']['count'] count += 1 clear_data['likes'] = likes clear_data['comments'] = comments clear_data['engagement_rate'] = (likes + comments) * 100 / ( clear_data['followers'] * count) self.write_to_mongo('accounts', clear_data) return 'Account seccesfuly scraped' except Exception as ex: print(str(ex)) return 'Something went wrong' #api.day_account_scrap('...') def day_account_scrap(self, account_name): while True: self.get_account(account_name=account_name) today = datetime.today() tomorrow = today.replace(day=today.day + 1, hour=0, minute=0, second=0) delta_time = tomorrow - today seconds = delta_time.seconds + 1 print(f'Next scrap will be {tomorrow}') time.sleep(seconds)
class InstaUnfollow: def __init__(self, username, password, API=None, action_interval=8.0, rate=120, interval=5400, unfollow_all=True): self.username = username self.password = password self.action_interval = action_interval self.rate = rate self.interval = interval self.unfollow_all = unfollow_all try: from app import logger logger = get_logger() except ImportError: pass self.logger = logging.LoggerAdapter(logger, { 'user': self.username, 'bot': 'instaunfollow' }) self.API = Client(self.username, self.password) if API is None else API self.webAPI = WebClient() def _get_user_ids(self): self.logger.info('Collecting users to unfollow...') # Get people followings rank_token = self.API.generate_uuid() following = self.API.user_following(self.id, rank_token=rank_token) following_users = following.get("users") _ids = [user.get("pk", 0) for user in following_users] return _ids def _login(self): attempts = 0 while attempts <= 10: try: if self.API.login(): return True except Exception as e: self.logger.error("Failed to login", e) sleep(6) attempts += 1 return False def start(self): # if not self.API.is_logged_in: # if not self._login(): # return False self.logger.info("Unfollow bot started for user {}...".format( self.API.username)) self.id = self.webAPI.user_info2(self.username).get("id") users = self._get_user_ids() progress = 0 too_many_request_errors = 0 while users: progress += 1 # if not self.API.is_logged_in: # self.API.login() id = users.pop(0) res = self.API.friendships_destroy(id) if res.get("status", False) != "ok": users.append(id) too_many_request_errors += 1 if too_many_request_errors == 10: sleep(randint(60, 100)) too_many_request_errors = 0 if not (progress % self.rate): sleep(uniform(self.interval * 0.9, self.interval * 1.1)) # Sleep n seconds +/ 10% to induce randomness between each action sleep( uniform(self.action_interval * 0.9, self.action_interval * 1.1))
new_str = new_str + word + " " else: new_str = new_str + doc ret = new_str.strip() ret = re.sub("[\n,/,\\\]", "", ret) # ret = ret.replace(" ", " ") return ret if __name__ == '__main__': if len(sys.argv) < 2: print('Usage: python3 get_feed.py [query] ') sys.exit(0) query = sys.argv[1] api = Client(auto_patch=True, drop_incompat_keys=False) user_feed = api.user_feed(query, count=10) feed_data = { "username": user_feed[0]["node"]["owner"]["username"], "user_id": user_feed[0]["node"]["owner"]["id"], "media": [] } for photo in user_feed: media_shortcode = photo["node"]["shortcode"] data = photo["node"] media_data = {
class Finder(object): def __init__(self,id): self.__getMyWebAPI()#в конструкторе сразу получим доступ к апи и доступ к айд отслеживаемого пользователя self.webClient=WebClient(user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36") time.sleep(0.2) self.userId=id def __getMyWebAPI(self): logging.basicConfig() logger = logging.getLogger('instagram_private_api') logger.setLevel(logging.WARNING) password='******' login='******' fil='loginset.json' print('Client version: {0!s}'.format(client_version)) device_id = None try: settings_file = fil if not os.path.isfile(settings_file): # settings file does not exist print('Unable to find file: {0!s}'.format(settings_file)) # login new self.myWebAPI = Client( login, password, on_login=lambda x: onlogin_callback(x, fil)) else: with open(settings_file) as file_data: cached_settings = json.load(file_data, object_hook=from_json) print('Reusing settings: {0!s}'.format(settings_file)) device_id = cached_settings.get('device_id') # reuse auth settings self.myWebAPI = Client( login, password, settings=cached_settings) except (ClientCookieExpiredError, ClientLoginRequiredError) as e: print('ClientCookieExpiredError/ClientLoginRequiredError: {0!s}'.format(e)) # Login expired # Do relogin but use default ua, keys and such self.myWebAPI = Client( login, password, device_id=device_id, on_login=lambda x: onlogin_callback(x, fil)) except ClientLoginError as e: print('ClientLoginError {0!s}'.format(e)) exit(9) except ClientError as e: print('ClientError {0!s} (Code: {1:d}, Response: {2!s})'.format(e.msg, e.code, e.error_response)) exit(9) except Exception as e: print('Unexpected Exception: {0!s}'.format(e)) exit(99) # Show when login expires cookie_expiry = self.myWebAPI.cookie_jar.auth_expires print('Cookie Expiry: {0!s}'.format(datetime.datetime.fromtimestamp(cookie_expiry).strftime('%Y-%m-%dT%H:%M:%SZ'))) ############ def findFeed(self,next): if next!=-1 and next!=-2: feed = self.myWebAPI.user_feed(self.userId,max_id=next) time.sleep(0.2) else: feed = self.myWebAPI.user_feed(self.userId) time.sleep(0.2) return feed['items'],feed.get('next_max_id',-2) def findComments(self,media_id,next): if next!=-1 and next!=-2: comments = self.myWebAPI.media_comments(media_id,max_id=next) time.sleep(0.2) else: comments = self.myWebAPI.media_comments(media_id) time.sleep(0.2) return comments['comments'],comments.get('next_max_id',-2) def takeCommentsWithoutCircle(self,media_id,comment_count): comments = self.myWebAPI.media_n_comments(media_id,n=comment_count,reverse=True) time.sleep(0.2) return comments def findNewFeed(self): feed = self.myWebAPI.user_feed(self.userId) time.sleep(0.2) return feed['items'] def findIGTV(self): igtv=self.webClient.user_info2('igor_artamonov48') time.sleep(0.2) return igtv['edge_felix_video_timeline']['edges']
from instagram_web_api import Client, ClientCompatPatch, ClientError, ClientLoginError import json import sys import re #1518284433 - rober downey jr # 30588147, if __name__ == '__main__': if len(sys.argv) < 1: print('Usage: python3 get_user.py [query] ') sys.exit(0) query = sys.argv[1] api = Client(auto_patch=True, drop_incompat_keys=False) user = api.user_info2(query) retval = { "posts": user["counts"]["media"], "followers": user["counts"]["followed_by"], "following": user["counts"]["follows"], "is_verified": user["is_verified"], "fullname": user["full_name"], "profile_pic_url": user["profile_pic_url_hd"] } print(json.dumps(retval)) sys.stdout.flush()
def __init__(self,id): self.__getMyWebAPI()#в конструкторе сразу получим доступ к апи и доступ к айд отслеживаемого пользователя self.webClient=WebClient(user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36") time.sleep(0.2) self.userId=id