def run(self):
        access_token = os.getenv('FB_ACCESS_TOKEN')
        if not access_token:
            raise EnvironmentError("FB Access token is not set")

        with self.input().open('r') as facts_file:
            facts = json.load(facts_file)
        page_id = facts['ids']['instagram']['pageId']

        all_media = []

        fields = ','.join(self.columns.keys())
        # use limit=100 to keep amount of requests small
        # 100 is the maximum value the Graph API will accept
        limit = 100

        media_url = (f'{API_BASE}/{page_id}/media'
                     f'?fields={fields}&limit={limit}')

        response = try_request_multiple_times(media_url)
        response_json = response.json()

        current_count = len(response_json['data'])
        all_media.extend(response_json['data'])

        logger.info("Fetching Instagram posts ...")
        while 'next' in response_json['paging']:
            next_url = response_json['paging']['next']
            response = try_request_multiple_times(next_url)
            response_json = response.json()

            current_count += len(response_json['data'])
            if sys.stdout.isatty():
                print(
                    f"\rFetched {current_count} Instagram posts",
                    end='',
                    flush=True)
            for media in response_json['data']:
                all_media.append(media)

            if self.minimal_mode:
                logger.info("Running in minimal mode, stopping now")
                response_json['paging'].pop('next')

        if sys.stdout.isatty():
            print()  # have to manually print newline
        logger.info("Fetching of Instagram posts complete")

        df = pd.DataFrame([
            {
                column: adapter(media[column])
                for (column, adapter)
                in self.columns.items()
            }
            for media
            in all_media
        ])
        with self.output().open('w') as output_file:
            df.to_csv(output_file, index=False, header=True)
Ejemplo n.º 2
0
    def run(self):
        with self.input().open('r') as facts_file:
            facts = json.load(facts_file)
        page_id = facts['ids']['instagram']['pageId']

        df = pd.DataFrame(columns=self.columns)
        metrics = ','.join([
            'impressions', 'reach', 'profile_views', 'follower_count',
            'website_clicks'
        ])
        period = 'day'
        url = f'{API_BASE}/{page_id}/insights?metric={metrics}&period={period}'
        response = try_request_multiple_times(url)
        response_data = response.json()['data']

        timestamp = response_data[0]['values'][0]['end_time']

        impressions = response_data[0]['values'][0]['value']
        reach = response_data[1]['values'][0]['value']
        profile_views = response_data[2]['values'][0]['value']
        follower_count = response_data[3]['values'][0]['value']
        website_clicks = response_data[4]['values'][0]['value']

        df.loc[0] = [
            timestamp, impressions, reach, profile_views, follower_count,
            website_clicks
        ]

        with self.output().open('w') as output_file:
            df.to_csv(output_file, index=False, header=True)
    def run(self):
        with self.input().open('r') as facts_file:
            facts = json.load(facts_file)
        page_id = facts['ids']['instagram']['pageId']

        df = pd.DataFrame(columns=self.columns)
        fields = ','.join([
            'followers_count',
            'media_count'
        ])
        url = f'{API_BASE}/{page_id}?fields={fields}'
        response = try_request_multiple_times(url)
        response_data = response.json()

        timestamp = dt.datetime.now()

        follower_count = response_data.get('followers_count')
        media_count = response_data.get('media_count')

        df.loc[0] = [
            timestamp,
            follower_count,
            media_count
        ]

        with self.output().open('w') as output_file:
            df.to_csv(output_file, index=False, header=True)
    def run(self):
        with self.input().open('r') as facts_file:
            facts = json.load(facts_file)
        page_id = facts['ids']['instagram']['pageId']

        df = pd.DataFrame(columns=self.columns)
        metrics = ','.join([
            'impressions',
            'reach',
            'profile_views',
            'follower_count',
            'website_clicks'
        ])
        period = 'day'
        url = f'{API_BASE}/{page_id}/insights?metric={metrics}&period={period}'
        response = try_request_multiple_times(url)
        response_data = response.json()['data']

        timestamp = response_data[0]['values'][0]['end_time']
        metrics = self.extract_metrics(response_data)

        df = df.append({'timestamp': timestamp, **metrics}, ignore_index=True)

        with self.output().open('w') as output_file:
            df.to_csv(output_file, index=False, header=True)
Ejemplo n.º 5
0
    def run(self):
        with self.input().open('r') as input_file:
            post_df = pd.read_csv(input_file)

        if self.minimal_mode:
            post_df = post_df.head(5)

        generic_metrics = ['impressions', 'reach', 'engagement', 'saved']

        performance_df = pd.DataFrame(columns=[
            column for column in self.columns
            if not column.startswith('delta_')
        ])

        fetch_time = dt.datetime.now()
        for i, row in self.tqdm(post_df.iterrows(),
                                desc="Fetching insights for instagram posts",
                                total=len(post_df)):
            # Fetch only insights for less than 2 months old posts
            post_time = dtparser.parse(row['timestamp'])
            if post_time.date() < \
               fetch_time.date() - self.timespan:
                continue

            metrics = ','.join(generic_metrics)
            if row['media_type'] == 'VIDEO':
                metrics += ',video_views'  # causes error if used on non-video

            url = f'{API_BASE}/{row["id"]}/insights?metric={metrics}'

            response = try_request_multiple_times(url)
            response_data = response.json()['data']

            impressions = response_data[0]['values'][0]['value']
            reach = response_data[1]['values'][0]['value']
            engagement = response_data[2]['values'][0]['value']
            saved = response_data[3]['values'][0]['value']

            video_views = response_data[4]['values'][0]['value']\
                if row['media_type'] == 'VIDEO'\
                else 0  # for non-video posts

            performance_df.loc[i] = [
                str(row['id']),  # The type was lost during CSV conversion
                fetch_time,
                impressions,
                reach,
                engagement,
                saved,
                video_views
            ]

        performance_df = self.filter_fkey_violations(performance_df)
        performance_df = self.condense_performance_values(
            performance_df,
            delta_function=PerformanceValueCondenser.linear_delta)

        with self.output().open('w') as output_file:
            performance_df.to_csv(output_file, index=False, header=True)
Ejemplo n.º 6
0
def _get_single_metric(page_id, metric, period='lifetime'):

    url = f'{API_BASE}/{page_id}/insights?metric={metric}&period={period}'
    res = try_request_multiple_times(url)
    return res.json()['data'][0]['values'][0]['value']