Example #1
0
 def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         self.web_api = Client(
             proxy=self.proxies.get("https") if self.proxies else None,
             auto_patch=True,
             drop_incompat_keys=False)
     self.end_cursors = DefaultAttrDict(lambda: None)
Example #2
0
def run():
    web_api = Client(auto_patch=True, drop_incompat_keys=False)

    feed = []
    startup = True
    user_dict = {
        "SpaceX": "20311520",
        "jclishman.testing": "7400533474"
    }

    while True:
        for id_str in list(user_dict.values()):
            try:
                feed.append(web_api.user_feed(id_str, count=1))
                time.sleep(5)
                
            except Exception as e:
                #logger.error(str(e))
                #logger.error("Error getting feed. Sleeping for 30s")
                time.sleep(30)

        for post in feed:
            post = post[0]["node"]
            user_id_str = post["owner"]["id"]
            shortcode = post["shortcode"]
            timestamp = post["created_time"]

            # Empty string if there isn't a caption
            try:
                caption = post["caption"]["text"]
            except:
                caption = ''

            # Match ID number to screenname
            for screen_name, id_str in user_dict.items():
                if user_id_str == id_str:
                    user_screen_name = screen_name

            stored_timestamp = db.get_instagram_timestamp(user_screen_name)

            if int(timestamp) > stored_timestamp:
                start_time = time.time()
                db.update_instagram_timestamp(user_screen_name, int(timestamp))

                logger.info(f"New Instagram post by @{user_screen_name}, id {user_id_str}")
                logger.info(f"Post shortcode: {shortcode}")
                logger.info(f"Post caption: {caption}")
                logger.info(f"Post timestamp: {timestamp}")

                url = f"https://instagram.com/p/{shortcode}"
                if not startup:
                    db.insert_message('Instagram', user_screen_name, caption.replace("\n", " "), url, start_time)

        time.sleep(10)
        startup = False
Example #3
0
def instagram_feed(user_handle="google", user_id=1067259270):

    feed_list = []
    user_id = user_id  #TODO:generate user_id given a user_handle
    web_api = Client(auto_patch=True, drop_incompat_keys=False)
    user_feed_info = web_api.user_feed(user_id,
                                       count=50)  #gets fifty user feeds

    for feeds in user_feed_info:
        try:
            raw_item = feeds["node"]
            date = datetime.fromtimestamp(
                int(raw_item.get('taken_at_timestamp')))
            feed_info = {
                "provider":
                "instagram",
                "provider_handle":
                user_handle or '',
                "link":
                raw_item["link"] or '',
                "likes":
                raw_item["likes"]["count"] or 0,
                "media": [],
                "video_views":
                raw_item.get('video_view_count') or 0,
                "caption":
                raw_item["edge_media_to_caption"]["edges"][0]["node"]["text"]
                or '',
            }

            feed_info['pubDate'] = date.strftime(
                '%a, %d %b %Y %H:%M:%S') + ' GMT'
            img_link = raw_item.get('display_src') or raw_item.get(
                'thumbnail_src')
            if img_link:
                feed_info['media'].append(img_link)
            if raw_item["is_video"]:
                feed_info["videos"] = raw_item["display_url"]
                vid_link = feed_info["videos"]
                if vid_link:
                    feed_info['media'].append(vid_link)

            feed_list.append(feed_info)
        except:
            raise ("Could not get instagram feed or Feed does not exist")

    return app.response_class(BytesIO(json.dumps(feed_list)),
                              content_type='application/json')
Example #4
0
def scrape(event, context, page_size=settings.PAGE_SIZE):

    # Instagram no longer enables scraping of pages
    # larger than 50. We want to enable scraping pages of
    # different sizes for trial purposes, but if
    # we try it with more than 50 Instagram will just block
    # the request
    assert page_size <= 50, "Page size must be <= 50"

    # extract the location and cursor from the arguments.
    # We use "event" and "context" in accordance with the Lambda
    # convention.
    location_id = event.get('location', settings.DEFAULT_LOCATION)
    end_cursor = event.get('cursor', None)

    # instantiate the instagram web client
    web_api = Client(auto_patch=True, drop_incompat_keys=False)

    # query a page of this location's feed
    location_feed_info = web_api.location_feed(
        location_id,
        count=page_size,
        end_cursor=end_cursor,
        # proxy=proxy.get_random_http_proxy()
    )

    if location_feed_info['status'] != "ok":
        raise

    # massage the GraphQL response into a more usable form
    location = location_feed_info['data']['location']
    media = location['edge_location_to_media']

    # top_posts = location['edge_location_to_top_posts']

    del location['edge_location_to_media']
    del location['edge_location_to_top_posts']

    cursor = media['page_info']['end_cursor']
    # has_next_page = media['page_info']['has_next_page']

    posts = [node['node'] for node in media['edges']]

    # return the response
    return {
        "cursor": cursor,
        "data": posts,
    }
    def auth(self):
        settings = self._settings_manager.get_settings()
        client_params = dict(
            auto_patch=True,
            authenticate=True,
            username=self.username,
            password=self._password,
        )
        if not settings:
            logging.debug(
                f"Unable to find file: {self._settings_manager.setting_file_name!s}"
            )
            client_params.update(
                dict(on_login=lambda x: onlogin_callback(x, self.username)))
        else:
            cached_settings = self._settings_manager.get_settings()
            logging.debug(
                f"Reusing settings: {self._settings_manager.setting_file_name!s}"
            )
            client_params.update(dict(settings=cached_settings))

        self._api = Client(**client_params)

        # Show when login expires
        cookie_expiry = self._api.cookie_jar.auth_expires
        logging.debug("Cookie Expiry: {0!s}".format(
            datetime.datetime.fromtimestamp(cookie_expiry).strftime(
                "%Y-%m-%dT%H:%M:%SZ")))
Example #6
0
 def __init__(self,
              username,
              password,
              API=None,
              action_interval=8.0,
              rate=120,
              interval=5400,
              unfollow_all=True):
     self.username = username
     self.password = password
     self.action_interval = action_interval
     self.rate = rate
     self.interval = interval
     self.unfollow_all = unfollow_all
     try:
         from app import logger
         logger = get_logger()
     except ImportError:
         pass
     self.logger = logging.LoggerAdapter(logger, {
         'user': self.username,
         'bot': 'instaunfollow'
     })
     self.API = Client(self.username, self.password) if API is None else API
     self.webAPI = WebClient()
Example #7
0
    def __init__(self,
                 username,
                 password,
                 similar_users,
                 API=None,
                 action_interval=8.0,
                 rate=75,
                 interval=5400):
        self.username = username
        self.password = password

        if isinstance(similar_users, str):
            self.similar_users = [x.strip() for x in similar_users.split(",")]
        else:
            self.similar_users = similar_users

        self.action_interval = action_interval
        self.rate = rate
        self.interval = interval
        self.logger = logging.LoggerAdapter(logger, {
            'user': self.username,
            'bot': 'instafollow'
        })

        self.API = Client(self.username, self.password) if API is None else API
        self.webAPI = WebClient()
Example #8
0
def main():
    if ACCESS_TOKEN == DEFAULT_ACCESS_TOKEN:
        print "Please update your Dingtalk webhook access token"
        return

    path = INSTAGRAM_PATH_HOTTEST if INSTAGRAM_IS_FEED_HOTTEST else INSTAGRAM_PATH_LATEST

    web_api = Client(auto_patch=True, drop_incompat_keys=False)
    while True:
        # Load up post have already been posted
        old_posts = []
        with open("old_posts.txt", "r") as posts_file:
            for line in posts_file:
                old_posts.append(line.rstrip())

        # Traverse to post list in response data
        feed = web_api.tag_feed(INSTAGRAM_TAG)["data"]["hashtag"]
        feed = feed[path]["edges"]

        for post in feed:
            post = post["node"]

            postId = post["id"]
            picUrl = post["display_url"]
            shortCode = post["shortcode"]
            isVideo = post["is_video"]

            if postId in old_posts:
                print "{} already exist".format(postId)
                continue
            else:
                with open("old_posts.txt", "a") as posts_file:
                    posts_file.write(postId+"\n")

                old_posts.append(postId)
                if not isVideo:
                    send2Ding(postId, picUrl, shortCode)

        time.sleep(REFRESH_DURATION)
Example #9
0
def get_client(scraper_username, scraper_password, proxy):
    settings_path = settings_file_path.format(scraper_username)
    try:
        if not os.path.isfile(settings_path):
            # logger.info('[{}] Logging in'.format(scraper_username))
            # logger.info('Username: {} Password: {} Proxy: {}'.format(scraper_username, scraper_password, proxy))
            # proxy='http://138.197.49.55:50000'
            return Client(username=scraper_username, password=scraper_password, authenticate=True, proxy=proxy, on_login=lambda x: on_login_callback(x, settings_path))
        else:
            with open(settings_path) as file_data:
                cached_settings = json.load(file_data, object_hook=from_json)
            # logger.info('[{}] Reusing settings: {}'.format(scraper_username, settings_path))

            device_id = cached_settings.get('device_id')
            # reuse auth settings
            return Client(
                username=scraper_username, password=scraper_password,
                settings=cached_settings,
                proxy=proxy)
    except Exception as e:
        logger.error('Authentication failed')
        logger.error(e)
Example #10
0
def rotate_proxies(event, context):
    proxies = get_proxies()

    # Picking random proxy to use.
    proxy = random.choice(proxies)

    final = ''

    # Replace this with insta/dynamoDB scraping.
    # Tested with icanhazip.com API (returns proxied ip address).
    for n in range(0, 12):
        # Wait random time to send request.
        wait_time = random.uniform(0.1, 2.5)
        time.sleep(wait_time)

        # Request to icanhazip, which returns IP address that is used.
        req = Request('http://icanhazip.com')
        req.set_proxy(proxy['ip'] + ':' + proxy['port'], 'http')

        # Creating proxy url. Instantiating client.
        spoof = 'http://' + proxy['ip'] + ':' + proxy['port']
        web_api = Client(
            auto_patch=True, drop_incompat_keys=False, proxy=spoof, timeout=30)

        # To test if the client works...
        # token = web_api.csrftoken
        # print("Token", token)

        # Change the proxy ip/port combo every 2.
        if n % 2 == 0:
            proxy_index = random.randint(0, len(proxies) - 1)
            proxy = proxies[proxy_index]

        try:
            my_ip = urlopen(req).read().decode('utf8')
            final = final + '#' + str(n) + ':' + my_ip
        except:
            del proxies[proxy_index]
            final = final + '# ' + 'failed'
            proxy_index = random.randint(0, len(proxies) - 1)
            proxy = proxies[proxy_index]

    # Test that rotation works.
    print(final)
    response = {
        "statusCode": 200,
        "body": final
    }
    return response
Example #11
0
 def __init__(self, connection_url='localhost', port=27017):
     self.web_api = Client(auto_patch=True, drop_incompat_keys=False)
     self.mongo_client = MongoClient(connection_url, port=port)
Example #12
0
class InstaFollow:
    def __init__(self,
                 username,
                 password,
                 similar_users,
                 API=None,
                 action_interval=8.0,
                 rate=75,
                 interval=5400):
        self.username = username
        self.password = password

        if isinstance(similar_users, str):
            self.similar_users = [x.strip() for x in similar_users.split(",")]
        else:
            self.similar_users = similar_users

        self.action_interval = action_interval
        self.rate = rate
        self.interval = interval
        self.logger = logging.LoggerAdapter(logger, {
            'user': self.username,
            'bot': 'instafollow'
        })

        self.API = Client(self.username, self.password) if API is None else API
        self.webAPI = WebClient()

    def _get_user_ids(self, save_to=None):

        self.logger.info('Collecting users to follow...')

        # Randomly select root account to search for users
        account = self.similar_users[randint(0, len(self.similar_users) - 1)]
        username_info = self.API.username_info(account)

        # Get root account id
        root_account_id = username_info.get('user').get('pk')

        # Get root account posts
        max_id = ''
        pages = 1
        media_ids = []

        for i in range(0, pages):
            user_feed = self.API.user_feed(root_account_id, max_id=max_id)
            media_items = user_feed.get('items')
            for media in media_items:
                media_ids.append(media.get('id'))
            max_id = user_feed.get('next_max_id')

        user_ids = []

        for media_id in media_ids:
            media_likers = self.API.media_likers(media_id)

            try:
                users = media_likers.get('users')
            except ChunkedEncodingError as e:
                self.logger.error("Failed to retrieve user list", e)
                users = []

            for user in users:
                id = user.get('pk')
                user_ids.append(id)

        user_ids = list(set(user_ids))

        self.logger.info("Found {} new users...".format(len(user_ids)))

        return user_ids

    def _login(self):
        attempts = 0
        while attempts <= 10:
            try:
                if self.API.login():
                    return True
            except Exception as e:
                self.logger.exception("Failed to login...")

            sleep(6)
            attempts += 1

        return False

    def start(self):

        self.logger.info("Follow bot started...")
        users = []
        while len(users) < 7000:
            users += self._get_user_ids()
        progress = 0
        bad_requests = 0
        successful_requests = 0
        while users:
            progress += 1
            # if not self.API.is_logged_in:
            #     self.API.login()

            id = users.pop(0)

            res = self.API.friendships_create(id)

            if res.get("status", False) != "ok":
                users.append(id)
                bad_requests += 1
            elif res.get("status", False) == "ok":
                successful_requests += 1

            if bad_requests == 10:
                self.logger.info(
                    "10 bad requests...sleeping for 3 mins 20 secs.")
                sleep(200)
                bad_requests = 0

            if not (progress % self.rate):
                progress = 0
                followings = self.webAPI.user_info2(self.username).get(
                    "follows", {}).get("count", 0)
                if followings > 7000:
                    break

                wait = uniform(self.interval * 0.9, self.interval * 1.1)
                self.logger.info(
                    "Cycle ended for user {} with {} successful requests and {} followers...sleeping for {}mins"
                    .format(self.username, successful_requests, followings,
                            wait / 60))
                successful_requests = 0
                sleep(wait)

            # Sleep n seconds +/ 10% to induce randomness between each action
            sleep(
                uniform(self.action_interval * 0.9,
                        self.action_interval * 1.1))
Example #13
0
class InstagramCrawler:
    api = None

    user_id = '327416611'

    social_endpoint = None

    def __init__(self):
        self.api = Client(auto_patch=True, drop_incompat_keys=False)
        self.social_endpoint = os.getenv('SOCIAL_ENDPOINT',
                                         'http://localhost:8080')

    def fetch(self, end_cursor=None):
        result = self.api.user_feed(self.user_id,
                                    count=50,
                                    extract=False,
                                    end_cursor=end_cursor)

        info = self.parse_http_result(result)

        for post in info['posts']:
            self.process_post(post)

        page_info = info['page_info']
        if page_info.get('has_next_page', False):
            time.sleep(2)
            self.fetch(page_info['end_cursor'])

    def parse_http_result(self, result):
        status = result.get('status', 'error')

        if status != 'ok':
            sys.exit('api response not ok')

        data = result['data']
        media = data['user']['edge_owner_to_timeline_media']

        return {
            'count': media['count'],
            'posts': [edge['node'] for edge in media['edges']],
            'page_info': media['page_info']
        }

    def process_post(self, post):
        payload = self.parse_post(post)

        r = requests.put('%s/instagram' % self.social_endpoint, json=payload)

        if r.status_code != 200:
            sys.exit(r.text)

    def parse_post(self, post):
        text_edges = post['edge_media_to_caption']['edges']

        if not text_edges:
            caption = ''
            tags = []
        else:
            text = text_edges[0]['node']['text']

            text = re.sub('\s+', ' ', text)
            text = re.sub('\.\s+', '', text)
            tags = list({
                tag.strip().lower()
                for tag in re.findall('(?<=#)[^# ]+(?=#|$| )', text)
            })
            caption = re.sub('(#[^# ]+ )*(#[^# ]+$)', '', text)

        return {
            'shortcode':
            post['shortcode'],
            'caption':
            caption,
            'tags':
            tags,
            'likes':
            post['likes']['count'],
            'comments':
            post['comments']['count'],
            'type':
            post['type'],
            'thumbnail':
            post['images']['thumbnail']['url'],
            'image':
            post['images']['standard_resolution']['url'],
            'timestamp':
            datetime.utcfromtimestamp(int(post['created_time'])).replace(
                tzinfo=timezone.utc).isoformat()
        }
Example #14
0
def public_info(username):
    api = Client()
    return json.dumps(api.user_info2(username), indent=4)
    print('Client version: %s' % __version__)

    cached_auth = None
    if args.settings_file_path and os.path.isfile(args.settings_file_path):
        with open(args.settings_file_path) as file_data:
            cached_auth = json.load(file_data)

    api = None
    if not cached_auth and args.username and args.password:
        # start afresh without existing auth
        try:
            print('New login.')
            api = Client(auto_patch=True,
                         drop_incompat_keys=False,
                         username=args.username,
                         password=args.password,
                         authenticate=True)
        except ClientLoginError:
            print('Login Error. Please check your username and password.')
            sys.exit(99)

        cached_auth = api.settings
        if args.save:
            # this auth cache can be re-used for up to 90 days
            with open(args.settings_file_path, 'w') as outfile:
                json.dump(cached_auth, outfile)

    elif cached_auth and args.username and args.password:
        try:
            print('Reuse login.')
Example #16
0
 def __init__(self):
     self.api = Client(auto_patch=True, drop_incompat_keys=False)
     self.social_endpoint = os.getenv('SOCIAL_ENDPOINT',
                                      'http://localhost:8080')
Example #17
0
class InstagramSession(session.StreamSession):

    BATCH_COUNT = 25

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            self.web_api = Client(
                proxy=self.proxies.get("https") if self.proxies else None,
                auto_patch=True,
                drop_incompat_keys=False)
        self.end_cursors = DefaultAttrDict(lambda: None)

    @memo(region="long")
    def user_name_to_id(self, user_name):
        try:
            user_id = self.web_api.user_info2(user_name)["id"]
        except:
            raise SGException(f"user id for {user_name} not found")
        return user_id

    def get_feed_items(self, user_name, count=BATCH_COUNT):

        try:
            feed = self.web_api.user_feed(
                self.user_name_to_id(user_name),
                count=self.BATCH_COUNT,
                end_cursor=self.end_cursors[user_name])
        except ClientConnectionError as e:
            logger.warn(f"connection error: {e}")

        for post in feed:
            try:
                cursor = (post["node"]["edge_media_to_comment"]["page_info"]
                          ["end_cursor"])
                if cursor:
                    self.end_cursors[user_name] = cursor
            except KeyError:
                pass

            post_type = None
            post_id = post["node"]["id"]

            try:
                title = post["node"]["caption"]["text"].replace("\n", "")
            except TypeError:
                title = "(no caption)"

            media_type = post["node"]["type"]
            if media_type == "video":
                post_type = "video"
                # content = InstagramMediaSource(post["node"]["link"], media_type="video")
                # content = InstagramMediaSource(post["node"]["videos"]["standard_resolution"]["url"], media_type="video")
                content = self.provider.new_media_source(
                    post["node"]["videos"]["standard_resolution"]["url"],
                    media_type="video")

            elif media_type == "image":
                if "carousel_media" in post["node"]:
                    post_type = "story"
                    content = [
                        # InstagramMediaSource(m["images"]["standard_resolution"]["url"], media_type="image")
                        self.provider.new_media_source(
                            m["images"]["standard_resolution"]["url"],
                            media_type="image") if m["type"] == "image" else
                        # InstagramMediaSource(m["video_url"], media_type="video")
                        self.provider.new_media_source(m["video_url"],
                                                       media_type="video")
                        if m["type"] == "video" else None
                        for m in post["node"]["carousel_media"]
                    ]
                else:
                    post_type = "image"
                    # content = InstagramMediaSource(post["node"]["images"]["standard_resolution"]["url"], media_type="image")
                    content = self.provider.new_media_source(
                        post["node"]["images"]["standard_resolution"]["url"],
                        media_type="image")
                    # raise Exception
            else:
                logger.warn(f"no content for post {post_id}")
                continue

            yield (AttrDict(guid=post_id,
                            title=title.strip(),
                            post_type=post_type,
                            created=datetime.fromtimestamp(
                                int(post["node"]["created_time"])),
                            content=content))
Example #18
0
import json
from django.http import HttpResponse, JsonResponse
from .models import user, selfPost, twitterPost, instaPost
from dateutil.parser import parse
import twitter
from instagram_web_api import Client, ClientCompatPatch, ClientError, ClientLoginError
import time

TwitterApi = twitter.Api(
    consumer_key='tYbGmWUZkn4eNeJwG00t9N7Si',
    consumer_secret='OoKHVW52WU7p2hbYo0dHp1Nb2zwk1wgwRUjQ6YaKauAFVehdl2',
    access_token_key='3288956831-FpFDCVDeupp192qXpuJmOIC9VvT1gHXoHP0PQKc',
    access_token_secret='GcSIrBQy4c23dCvt5xPWSjg7ItT8tlyGrYctSzHjmBmd7')

InstaApi = Client(auto_patch=True, drop_incompat_keys=False)


def getUser(request, username):
    u = user.objects.get(username=username)
    result = {
        "username": u.username,
        "twitterID": u.twitterID,
        "InstagramID": u.instaID
    }
    return JsonResponse(result, safe=False, status=200)


def getPosts(request, username):
    results = []
    u = user.objects.get(username=username)
Example #19
0
from instagram_web_api import Client, ClientCompatPatch, ClientError, ClientLoginError

from sightengine.client import SightengineClient
client = SightengineClient('630881392', 'St5TPUomwvLYq7eiXd4G')
web_api = Client(auto_patch=True, drop_incompat_keys=False)
user_feed_info = web_api.user_feed('232192182')


def checkDrugs(my_url):
    output = client.check('wad').set_url(my_url)
    drugs = output['drugs']
    return drugs


def checkWeapons(my_url):
    output = client.check('wad').set_url(my_url)
    weapons = output['weapon']
    return weapons


def checkAlcohol(my_url):
    output = client.check('wad').set_url(my_url)
    alcohol = output['alcohol']
    return alcohol


def checkCaption(my_url):
    output = client.check('wad').set_url(my_url)
    print(output)

Example #20
0
def get_feed(twitter_api):
    last = src.last.get_last(src.last.PostType.MEDIA)
    highest = last

    web_api = Client(auto_patch=True, drop_incompat_keys=False)
    user_feed = web_api.user_feed(os.getenv('INSTAGRAM_USERID'), count=23)

    for post in reversed(user_feed):
        # ID comes in the format 'POSTID_USERID'
        post_id = int(post['node']['id'].split('_')[0])

        # If has not been processed already
        if post_id > last:
            # Hashtag
            tweet_metadata = ['#鈴木このみ', ' ']

            # Format timestamp
            timestamp = datetime.datetime.fromtimestamp(
                post['node']['taken_at_timestamp'],
                pytz.timezone('Asia/Tokyo'))
            tweet_metadata += [timestamp.strftime('%Y-%m-%d %H:%M'), '\n']

            # Post URL
            tweet_metadata.append(post['node']['link'])

            # Caption
            caption = post['node']['caption']['text']
            tweet_content = ['\n\n', caption]

            media = []  # List of tuples of (type, url)

            if post['node']['__typename'] == MediaType.GALLERY.value:
                list_idx = 0
                list_type = None
                media_list = []
                for gallery_item in post['node']['edge_sidecar_to_children'][
                        'edges']:
                    if gallery_item['node'][
                            '__typename'] == MediaType.VIDEO.value:
                        if list_type is None:
                            media.append([gallery_item['node']['video_url']])
                        elif list_type is MediaType.IMAGE:
                            # Image list in progress
                            # Commit current list and create new list with video
                            media.append(media_list)
                            media.append([gallery_item['node']['video_url']])
                            list_type = None
                            media_list = []
                    else:
                        if list_type is None:
                            # No list in progress
                            list_type = MediaType.IMAGE
                            media_list.append(
                                gallery_item['node']['display_url'])
                        elif list_type is MediaType.IMAGE:
                            # Image list in progress
                            if len(media_list) > 4:
                                # List is somehow overfull
                                # Tweets only allow 4 images, so extra ones need to be split
                                while len(media_list) >= 4:
                                    media.append(media_list[:4])
                                    media_list = media_list[4:]
                                media_list.append(
                                    gallery_item['node']['display_url'])
                            elif len(media_list) == 4:
                                # List full
                                # Commit current list and create new list
                                media.append(media_list)
                                media_list = [
                                    gallery_item['node']['display_url']
                                ]
                            else:
                                # List not full yet
                                media_list.append(
                                    gallery_item['node']['display_url'])
                # Commit unfinished list if exists
                if list_type is MediaType.IMAGE and len(media_list) > 0:
                    media.append(media_list)

            elif post['node']['__typename'] == MediaType.VIDEO.value:
                media.append([post['node']['video_url']])

            else:
                media.append([post['node']['display_url']])

            tweet_str = twutils.truncate_status(''.join(tweet_metadata +
                                                        tweet_content))

            prev_status = 0
            for tweet_media in media:
                replyto = None
                if (prev_status > 0):
                    tweet_str = twutils.truncate_status(
                        ''.join(tweet_metadata))
                    replyto = prev_status

                if os.getenv('ENV', 'dev') == 'production':
                    prev_status = twitter_api.PostUpdate(
                        tweet_str,
                        tweet_media,
                        in_reply_to_status_id=prev_status).id
                else:
                    prev_status += 1
                    twitter_api.write(tweet_str + '\n\n')
                    twitter_api.write('\n'.join(tweet_media) + '\n\n')

            # Update highest ID if higher
            if post_id > highest:
                highest = post_id

    if (highest > last):
        src.last.set_last(str(highest), src.last.PostType.MEDIA)
Example #21
0
class WebAPIExportManager(object):
    def __init__(self, connection_url='localhost', port=27017):
        self.web_api = Client(auto_patch=True, drop_incompat_keys=False)
        self.mongo_client = MongoClient(connection_url, port=port)

    def write_to_mongo(self, collection, record):
        db = self.mongo_client.test_db
        collection = db[collection]
        collection.insert(record)

#from data_gathering import WebAPIExportManager
#api = WebAPIExportManager
#api.get_account('...')

    def get_account(self, account_name='nadine__is'):
        try:
            full_data = self.web_api.user_info2(account_name)
            clear_data = {
                'id':
                full_data.get('id'),
                'date':
                datetime.utcnow(),
                'name':
                full_data.get('full_name'),
                'username':
                full_data.get('username'),
                'followers':
                full_data.get('counts').get('followed_by'),
                'posts':
                full_data.get('edge_owner_to_timeline_media').get('count'),
            }
            likes = comments = count = 0
            for post in full_data.get('edge_owner_to_timeline_media').get(
                    'edges'):
                likes += post['node']['edge_liked_by']['count']
                comments += post['node']['edge_media_to_comment']['count']
                count += 1
            clear_data['likes'] = likes
            clear_data['comments'] = comments
            clear_data['engagement_rate'] = (likes + comments) * 100 / (
                clear_data['followers'] * count)
            self.write_to_mongo('accounts', clear_data)
            return 'Account seccesfuly scraped'
        except Exception as ex:
            print(str(ex))
            return 'Something went wrong'

#api.day_account_scrap('...')

    def day_account_scrap(self, account_name):
        while True:
            self.get_account(account_name=account_name)
            today = datetime.today()
            tomorrow = today.replace(day=today.day + 1,
                                     hour=0,
                                     minute=0,
                                     second=0)
            delta_time = tomorrow - today
            seconds = delta_time.seconds + 1
            print(f'Next scrap will be {tomorrow}')
            time.sleep(seconds)
Example #22
0
class InstaUnfollow:
    def __init__(self,
                 username,
                 password,
                 API=None,
                 action_interval=8.0,
                 rate=120,
                 interval=5400,
                 unfollow_all=True):
        self.username = username
        self.password = password
        self.action_interval = action_interval
        self.rate = rate
        self.interval = interval
        self.unfollow_all = unfollow_all
        try:
            from app import logger
            logger = get_logger()
        except ImportError:
            pass
        self.logger = logging.LoggerAdapter(logger, {
            'user': self.username,
            'bot': 'instaunfollow'
        })
        self.API = Client(self.username, self.password) if API is None else API
        self.webAPI = WebClient()

    def _get_user_ids(self):
        self.logger.info('Collecting users to unfollow...')

        # Get people followings
        rank_token = self.API.generate_uuid()

        following = self.API.user_following(self.id, rank_token=rank_token)

        following_users = following.get("users")

        _ids = [user.get("pk", 0) for user in following_users]

        return _ids

    def _login(self):
        attempts = 0
        while attempts <= 10:
            try:
                if self.API.login():
                    return True
            except Exception as e:
                self.logger.error("Failed to login", e)
            sleep(6)
            attempts += 1

        return False

    def start(self):

        # if not self.API.is_logged_in:
        #     if not self._login():
        #         return False

        self.logger.info("Unfollow bot started for user {}...".format(
            self.API.username))
        self.id = self.webAPI.user_info2(self.username).get("id")
        users = self._get_user_ids()

        progress = 0
        too_many_request_errors = 0
        while users:
            progress += 1
            # if not self.API.is_logged_in:
            #     self.API.login()

            id = users.pop(0)

            res = self.API.friendships_destroy(id)

            if res.get("status", False) != "ok":
                users.append(id)
                too_many_request_errors += 1

            if too_many_request_errors == 10:
                sleep(randint(60, 100))
                too_many_request_errors = 0

            if not (progress % self.rate):
                sleep(uniform(self.interval * 0.9, self.interval * 1.1))

            # Sleep n seconds +/ 10% to induce randomness between each action
            sleep(
                uniform(self.action_interval * 0.9,
                        self.action_interval * 1.1))
Example #23
0
                new_str = new_str + word + " "
    else:
        new_str = new_str + doc
    ret = new_str.strip()
    ret = re.sub("[\n,/,\\\]", "", ret)
    # ret = ret.replace("  ", " ")
    return ret

if __name__ == '__main__':
    if len(sys.argv) < 2:
        print('Usage: python3 get_feed.py [query] ')
        sys.exit(0)

    query = sys.argv[1]

    api = Client(auto_patch=True, drop_incompat_keys=False)

    user_feed = api.user_feed(query, count=10)

    feed_data = {
        "username": user_feed[0]["node"]["owner"]["username"],
        "user_id": user_feed[0]["node"]["owner"]["id"],
        "media": []
    }

    for photo in user_feed:

        media_shortcode = photo["node"]["shortcode"]
        data = photo["node"]

        media_data = {
Example #24
0
class Finder(object):
    def __init__(self,id):
        self.__getMyWebAPI()#в конструкторе сразу получим доступ к апи и доступ к айд отслеживаемого пользователя
        self.webClient=WebClient(user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36")
        time.sleep(0.2)
        self.userId=id
    def __getMyWebAPI(self):
        logging.basicConfig()
        logger = logging.getLogger('instagram_private_api')
        logger.setLevel(logging.WARNING)
        password='******'
        login='******'
        fil='loginset.json'
        print('Client version: {0!s}'.format(client_version))

        device_id = None
        try:

            settings_file = fil
            if not os.path.isfile(settings_file):
                # settings file does not exist
                print('Unable to find file: {0!s}'.format(settings_file))

                # login new
                self.myWebAPI = Client(
                login, password,
                on_login=lambda x: onlogin_callback(x, fil))
            else:
                with open(settings_file) as file_data:
                    cached_settings = json.load(file_data, object_hook=from_json)
                print('Reusing settings: {0!s}'.format(settings_file))

                device_id = cached_settings.get('device_id')
                # reuse auth settings
                self.myWebAPI = Client(
                login, password,
                settings=cached_settings)

        except (ClientCookieExpiredError, ClientLoginRequiredError) as e:
            print('ClientCookieExpiredError/ClientLoginRequiredError: {0!s}'.format(e))

        # Login expired
        # Do relogin but use default ua, keys and such
            self.myWebAPI = Client(
                login, password,
                device_id=device_id,
                on_login=lambda x: onlogin_callback(x, fil))

        except ClientLoginError as e:
            print('ClientLoginError {0!s}'.format(e))
            exit(9)
        except ClientError as e:
            print('ClientError {0!s} (Code: {1:d}, Response: {2!s})'.format(e.msg, e.code, e.error_response))
            exit(9)
        except Exception as e:
            print('Unexpected Exception: {0!s}'.format(e))
            exit(99)

         # Show when login expires
        cookie_expiry = self.myWebAPI.cookie_jar.auth_expires
        print('Cookie Expiry: {0!s}'.format(datetime.datetime.fromtimestamp(cookie_expiry).strftime('%Y-%m-%dT%H:%M:%SZ')))
        ############
    def findFeed(self,next):
        if next!=-1 and next!=-2: 
            feed = self.myWebAPI.user_feed(self.userId,max_id=next)
            time.sleep(0.2)
        else:
            feed = self.myWebAPI.user_feed(self.userId)
            time.sleep(0.2)
        return feed['items'],feed.get('next_max_id',-2)
    def findComments(self,media_id,next):
        if next!=-1 and next!=-2: 
            comments = self.myWebAPI.media_comments(media_id,max_id=next)
            time.sleep(0.2)
        else:
            comments = self.myWebAPI.media_comments(media_id)
            time.sleep(0.2)
        return comments['comments'],comments.get('next_max_id',-2)
    def takeCommentsWithoutCircle(self,media_id,comment_count):
        comments = self.myWebAPI.media_n_comments(media_id,n=comment_count,reverse=True)
        time.sleep(0.2)
        return comments
    def findNewFeed(self):
        feed = self.myWebAPI.user_feed(self.userId)
        time.sleep(0.2)
        return feed['items']
    def findIGTV(self):
        igtv=self.webClient.user_info2('igor_artamonov48')
        time.sleep(0.2)
        return igtv['edge_felix_video_timeline']['edges']
Example #25
0
from instagram_web_api import Client, ClientCompatPatch, ClientError, ClientLoginError
import json
import sys
import re

#1518284433 -  rober downey jr
# 30588147,
if __name__ == '__main__':
    if len(sys.argv) < 1:
        print('Usage: python3 get_user.py [query] ')
        sys.exit(0)

    query = sys.argv[1]

    api = Client(auto_patch=True, drop_incompat_keys=False)

    user = api.user_info2(query)

    retval = {
        "posts": user["counts"]["media"],
        "followers": user["counts"]["followed_by"],
        "following": user["counts"]["follows"],
        "is_verified": user["is_verified"],
        "fullname": user["full_name"],
        "profile_pic_url": user["profile_pic_url_hd"]
    }

    print(json.dumps(retval))
    sys.stdout.flush()
Example #26
0
 def __init__(self,id):
     self.__getMyWebAPI()#в конструкторе сразу получим доступ к апи и доступ к айд отслеживаемого пользователя
     self.webClient=WebClient(user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36")
     time.sleep(0.2)
     self.userId=id