Example #1
0
    def get_export(self, start_date, end_date, event=None, where=None, bucket_id=None, response_format=FORMAT_JSON, result_key=""):
        """
        Get a "raw dump" of tracked events over a time period.

        Yields events as they are returned (matching the format shown below).

        Args:
            `start_date`: [str] The date in yyyy-mm-dd format from which to begin querying for the
                                event from. This date is inclusive.
                          [sample]: "2014-04-01"
            `end_date`: [str] The date in yyyy-mm-dd format from which to stop querying for the
                              event from. This date is inclusive.
                        [sample]: "2014-04-01"
            `event`: [array (optional)]: The event or events that you wish to get data for.
                               [sample]: ["play song", "log in", "add playlist"]
            `where`: [str] An expression to filter events by.
            `bucket_id`: [str] The specific data bucket you would like to query.
            'result_key': [str] The field in the event result that will act as key to compose the dict with the query results

        Event format:
            {"event":"Viewed report","properties":{"distinct_id":"foo","time":1329263748,"origin":"invite",
            "origin_referrer":"http://mixpanel.com/projects/","$initial_referring_domain":"mixpanel.com",
            "$referrer":"https://mixpanel.com/report/3/stream/","$initial_referrer":"http://mixpanel.com/",
            "$referring_domain":"mixpanel.com","$os":"Linux","origin_domain":"mixpanel.com","tab":"stream",
            "$browser":"Chrome","Project ID":"3","mp_country_code":"US"}}
        """
        start_date_obj = self._validate_date(start_date)
        end_date_obj = self._validate_date(end_date)

        # Check the actual dates
        if start_date_obj > end_date_obj:
            raise exceptions.InvalidDateException('The `start_date` specified after the `end_date`; you will not receive any events.')

        if isinstance(event, str):
            event = [event]

        response = self.connection.raw_request(
            Connection.DATA_ENDPOINT,
            'export',
            {
                'from_date': start_date,
                'to_date': end_date,
                'event': event,
                'where': where,
                'bucket': bucket_id,
            },
            response_format
        )
        result = {}
        for line in response:
            # print line
            # result.append(json.loads(_totext(line)))
            # NOTA: Este es un pequeno cambio enfocado al uso que se le da este modulo para la metrica de latencia
            event = json.loads(_totext(line))
            key = event['properties'][result_key]
            # print key
            result[key] = event['properties'] 
            # print result[key]
        return result
Example #2
0
 def _validate_date(self, date):
     " Utility method used to validate a `response_format` param. "
     try:
         return datetime.datetime.strptime(date, '%Y-%m-%d')
     except ValueError:
         try:
             return datetime.datetime.strptime(date, '%Y-%m-%d %H:%M:%S')
         except ValueError:
             raise exceptions.InvalidDateException('The `date` specified is invalid. Must be in `YYYY-MM-DD` format.')
Example #3
0
    def get_segmentation_average(self,
                                 event_name,
                                 start_date,
                                 end_date,
                                 unit=UNIT_DAY,
                                 on=None,
                                 where=None,
                                 response_format=FORMAT_JSON):
        """
        Averages an expression for events per unit time.

        # Example 1
        Instead of finding out the total profit he is making per day by selling things
        from his website, Kevin can also find out the average price of an item being
        sold with the following query:

        > user_client.get_segmentation_average(
            'item sold',
            '2011-08-06',
            '2011-08-16',
            on='properties["price"] - properties["overhead"]',
        )
        {
            'results': {
                '2011-08-06': 8.64705882352939,
                '2011-08-07': 4.640625,
                '2011-08-08': 3.6230899830221,
                '2011-08-09': 7.3353658536585
            },
            'status': 'ok'
        }
        """
        self._validate_response_format(response_format)
        #self._validate_expression(on, where)
        start_date_obj = self._validate_date(start_date)
        end_date_obj = self._validate_date(end_date)

        # Check the actual dates
        if start_date_obj > end_date_obj:
            raise exceptions.InvalidDateException(
                'The `start_date` specified after the `end_date`; you will not receive any annotations.'
            )

        return self.connection.request('segmentation/average', {
            'event': event_name,
            'from_date': start_date,
            'to_date': end_date,
            'unit': unit,
            'on': on,
            'where': where,
            'unit': unit,
        },
                                       response_format=response_format)
Example #4
0
    def get_segmentation_sum(self,
                             event_name,
                             start_date,
                             end_date,
                             on,
                             unit=UNIT_DAY,
                             where=None,
                             response_format=FORMAT_JSON):
        """
        Sums an expression for events per unit time.

        # Example 1
        Kevin also sells things from example.com. He has an event named item sold
        that tracks each item that gets sold from his website. It has a number property
        named price that records the value of the item being sold. He has another number
        property named overhead that represents the overhead cost of the item. Kevin
        can find out how much profit he is making each day with the following query:

        > user_client.get_segmentation_sum(
            'item sold',
            '2011-08-06',
            '2011-08-16',
            on='properties["price"] - properties["overhead"]',
        )
        {   'results': {
                '2011-08-06': 376.0,
                '2011-08-07': 634.0,
                '2011-08-08': 474.0,
                '2011-08-09': 483.0
        },
            'status': 'ok'
        }
        """
        self._validate_response_format(response_format)
        #self._validate_expression(on, where)
        start_date_obj = self._validate_date(start_date)
        end_date_obj = self._validate_date(end_date)

        # Check the actual dates
        if start_date_obj > end_date_obj:
            raise exceptions.InvalidDateException(
                'The `start_date` specified after the `end_date`; you will not receive any annotations.'
            )

        return self.connection.request('segmentation/sum', {
            'event': event_name,
            'from_date': start_date,
            'to_date': end_date,
            'unit': unit,
            'on': on,
            'where': where
        },
                                       response_format=response_format)
Example #5
0
    def annotations_list(self,
                         start_date,
                         end_date,
                         response_format=FORMAT_JSON):
        """
        List the annotations for the given date range.

        Args:
            `start_date`: [str] The beginning of the date range to get annotations for in yyyy-mm-dd
                                format. This date is inclusive.
                          [sample]: "2014-04-01"
            `end_date`: [str] The end of the date range to get annotations for in yyyy-mm-dd format.
                              This date is inclusive.
                        [sample]: "2014-04-01"
            `response_format`: [string (optional)]: The data return format.
                               [sample]: "json" or "csv"

        Response format:
        {
            'annotations': [
                {'date': '2014-05-23 00:00:00', 'project_id': 23880, 'id': 148, 'description': 'Launched v2.0 of product'},
                {'date': '2014-05-29 00:00:00', 'project_id': 23880, 'id': 150, 'description': 'Streamlined registration process'}
            ],
            'error': false
        }
        """
        start_date_obj = self._validate_date(start_date)
        end_date_obj = self._validate_date(end_date)

        # Check the actual dates
        if start_date_obj > end_date_obj:
            raise exceptions.InvalidDateException(
                'The `start_date` specified after the `end_date`; you will not receive any annotations.'
            )

        return self.connection.request('annotations', {
            'from_date': start_date,
            'to_date': end_date,
        },
                                       response_format=response_format)
Example #6
0
    def get_export(self,
                   start_date,
                   end_date,
                   event=None,
                   where=None,
                   bucket_id=None,
                   response_format=FORMAT_JSON):
        """
        Get a "raw dump" of tracked events over a time period.

        Yields events as they are returned (matching the format shown below).

        Args:
            `start_date`: [str] The date in yyyy-mm-dd format from which to begin querying for the
                                event from. This date is inclusive.
                          [sample]: "2014-04-01"
            `end_date`: [str] The date in yyyy-mm-dd format from which to stop querying for the
                              event from. This date is inclusive.
                        [sample]: "2014-04-01"
            `event`: [array (optional)]: The event or events that you wish to get data for.
                               [sample]: ["play song", "log in", "add playlist"]
            `where`: [str] An expression to filter events by.
            `bucket_id`: [str] The specific data bucket you would like to query.

        Event format:
            {"event":"Viewed report","properties":{"distinct_id":"foo","time":1329263748,"origin":"invite",
            "origin_referrer":"http://mixpanel.com/projects/","$initial_referring_domain":"mixpanel.com",
            "$referrer":"https://mixpanel.com/report/3/stream/","$initial_referrer":"http://mixpanel.com/",
            "$referring_domain":"mixpanel.com","$os":"Linux","origin_domain":"mixpanel.com","tab":"stream",
            "$browser":"Chrome","Project ID":"3","mp_country_code":"US"}}
        """
        start_date_obj = self._validate_date(start_date)
        end_date_obj = self._validate_date(end_date)

        # Check the actual dates
        if start_date_obj > end_date_obj:
            raise exceptions.InvalidDateException(
                'The `start_date` specified after the `end_date`; you will not receive any events.'
            )

        # the provided event should be an array even when a singleton
        # if a singleton string/unicode is provided, put it into an array
        if isinstance(event, six.string_types):
            event = [event]

        response = self.connection.raw_request(
            Connection.DATA_ENDPOINT, 'export', {
                'from_date': start_date,
                'to_date': end_date,
                'event': event,
                'where': where,
                'bucket': bucket_id,
            }, response_format)
        # per mixpanel documentation it is necessary to load
        # the response in it's entirety before processing:
        #     > This endpoint uses gzip to compress the transfer;
        #     > as a result, raw exports should not be processed until
        #     > the file is received in its entirety.
        # https://mixpanel.com/docs/api-documentation/exporting-raw-data-you-inserted-into-mixpanel
        response_data = response.read()
        lines = _totext(response_data).split('\n')

        for line in lines:
            if line:
                yield json.loads(line)
Example #7
0
    def get_segmentation_multiseg(self,
                                  event_name,
                                  start_date,
                                  end_date,
                                  unit=UNIT_DAY,
                                  inner=None,
                                  outer=None,
                                  data_type=DATA_TYPE_GENERAL,
                                  where=None,
                                  limit=None,
                                  response_format=FORMAT_JSON):
        """
        WARNING THIS IS AN UNDOCUMENTED API ENDPOINT
        USE AT YOUR OWN RISK, MIXPANEL MAY CHANGE THIS

        This allows a user to segment on two properties rather
        just one. Example:

        > user_client.get_segmentation_multiseg(
            'item sold',
            '2011-08-06',
            '2011-08-16',
            inner='properties["$city"]',
            outer='properties["$region"]',
        )
        {   
        u'data': {   
            u'series': [u'2015-06-01', u'2015-06-02', u'2015-06-03'],
            u'values': {   
                u'North Carolina': {   
                    u'Charlotte': {   
                        u'2015-06-01': 300,
                        u'2015-06-02': 111,
                        u'2015-06-03': 171
                    },
                    u'Austin': {   
                        u'2015-06-01': 0,
                        u'2015-06-02': 0,
                        u'2015-06-03': 0
                    }
                },
                u'Texas': {   
                    u'Charlotte': {   
                        u'2015-06-01': 0,
                        u'2015-06-02': 0,
                        u'2015-06-03': 0},
                    u'Austin': {   
                        u'2015-06-01': 3181,
                        u'2015-06-02': 3219,
                        u'2015-06-03': 3484
                    }
                }
            }
        },
        u'legend_size': 4
        }


        Note the way this works. Limit defines the range for both the inner
        outer properties. The top <limit> outer segments are matched against
        the top <limit> inner segments. In the case of region ond city
        this results in many nested segments with zero results. At this time
        you do not get the top inner segments per outer segment.
        """
        self._validate_response_format(response_format)
        #self._validate_expression(inner, outer, where)
        start_date_obj = self._validate_date(start_date)
        end_date_obj = self._validate_date(end_date)

        # Check the actual dates
        if start_date_obj > end_date_obj:
            raise exceptions.InvalidDateException(
                'The `start_date` specified after the `end_date`; you will not receive any annotations.'
            )

        return self.connection.request('segmentation/multiseg', {
            'event': event_name,
            'from_date': start_date,
            'to_date': end_date,
            'unit': unit,
            'inner': inner,
            'outer': outer,
            'where': where,
            'type': data_type,
            'limit': limit,
        },
                                       response_format=response_format)
Example #8
0
    def get_segmentation_numeric(self,
                                 event_name,
                                 start_date,
                                 end_date,
                                 on,
                                 unit=UNIT_DAY,
                                 where=None,
                                 buckets=None,
                                 data_type=DATA_TYPE_UNIQUE,
                                 response_format=FORMAT_JSON):
        """
        Get data for an event, segmented and filtered by properties, with values placed into numeric buckets.

        # Example 1
        Kevin also has an event named page loaded. It has a property named time
        that represents the time in milliseconds it took to load the page. Kevin
        wants to see the distribution of page load times that are greater than
        2000 milliseconds. But suppose Harry accidentally was sending the time
        property to Mixpanel as a string, so its the incorrect type. He can make
        the following query involving an explicit typecast to number to get the
        results he wants:

        > user_client.get_segmentation_numeric(
            'page loaded',
            '2011-08-06',
            '2011-08-16',
            on='number(properties["time"])',
            where='number(properties["time"]) >= 2000',
            buckets=5
        )
        {
            'data': {
                'series': ['2011-08-08', '2011-08-09', '2011-08-06', '2011-08-07'],
                'values': {
                    '2,000 - 2,100': {
                        '2011-08-06': 1,
                        '2011-08-07': 5,
                        '2011-08-08': 4,
                        '2011-08-09': 15
                    },
                    '2,100 - 2,200': {
                        '2011-08-07': 2,
                        '2011-08-08': 7,
                        '2011-08-09': 15
                    },
                    '2,200 - 2,300': {
                        '2011-08-06': 1,
                        '2011-08-08': 6,
                        '2011-08-09': 5
                    },
                    '2,300 - 2,400': {
                        '2011-08-06': 4,
                        '2011-08-08': 1,
                        '2011-08-09': 12
                    },
                    '2,400 - 2,500': {
                        '2011-08-08': 2,
                        '2011-08-09': 5
                    }
                }
            },
            'legend_size': 5
        }
        """
        self._validate_response_format(response_format)
        #self._validate_expression(on, where)
        start_date_obj = self._validate_date(start_date)
        end_date_obj = self._validate_date(end_date)

        # Check the actual dates
        if start_date_obj > end_date_obj:
            raise exceptions.InvalidDateException(
                'The `start_date` specified after the `end_date`; you will not receive any annotations.'
            )

        return self.connection.request('segmentation/numeric', {
            'event': event_name,
            'from_date': start_date,
            'to_date': end_date,
            'unit': unit,
            'on': on,
            'where': where,
            'buckets': buckets,
            'type': data_type,
        },
                                       response_format=response_format)
Example #9
0
    def get_segmentation(self,
                         event_name,
                         start_date,
                         end_date,
                         unit=UNIT_DAY,
                         on=None,
                         where=None,
                         limit=None,
                         data_type=DATA_TYPE_UNIQUE,
                         response_format=FORMAT_JSON):
        """
        Get data for an event, segmented and filtered by properties.

        # Example 1
        Suppose Kevin Wood has a website named guidebook.com. He has an event named
        signed up, sent whenever a user signs up to example.com. It has a string
        property named `mp_country_code` that stores the country code of the
        user signing up.

        > user_client.get_segmentation('signed up', '2011-08-06', ' 2011-08-16')
        {
            'data': {
                'series': ['2011-08-08', '2011-08-09', '2011-08-06', '2011-08-07'],
                'values': {
                    'signed up': {
                        '2011-08-06': 147,
                        '2011-08-07': 146,
                        '2011-08-08': 776,
                        '2011-08-09': 1376
                    }
                }
            },
            'legend_size': 1
        }

        # Example 2
        Suppose Kevin is impressed with the number of signups on 2011-08-09, and now
        wants to know the top five countries his signups came from on that day. He
        can make the following query:

        > user_client.get_segmentation('signed up', '2011-08-09', ' 2011-08-09', limit=5, on='properties["mp_country_code"]')
        {
            'data': {
                'series': ['2011-08-09'],
                'values': {
                    'CA': {'2011-08-09': 277},
                    'FR': {'2011-08-09': 8},
                    'GB': {'2011-08-09': 19},
                    'IN': {'2011-08-09': 19},
                    'US': {'2011-08-09': 1036}
                }
            },
            'legend_size': 5
        }

        # Example 3
        Kevin now wants to zero in on the US and Canada. He is tracking a property
        named mp_keyword, which tells him the search keyword that users used to
        get to example.com. Now he wants to determine how many signups from the US
        and Canada came about as a result of a search that contained the word 'guidebook.'
        He can do that with the following query:

        > user_client.get_segmentation(
            'signed up',
            '2011-08-09',
            '2011-08-09',
            on='properties["mp_country_code"]',
            where='"guidebook" in properties["mp_keyword"] and ("CA" == properties["mp_country_code"] or "US" == properties["mp_country_code"])'
        )
        {
            'data': {
                'series': ['2011-08-09'],
                'values': {
                    'CA': {'2011-08-09': 31},
                    'US': {'2011-08-09': 312}
                }
            },
            'legend_size': 2
        }
        """
        self._validate_response_format(response_format)
        #self._validate_expression(on, where)
        start_date_obj = self._validate_date(start_date)
        end_date_obj = self._validate_date(end_date)

        # Check the actual dates
        if start_date_obj > end_date_obj:
            raise exceptions.InvalidDateException(
                'The `start_date` specified after the `end_date`; you will not receive any annotations.'
            )

        return self.connection.request('segmentation', {
            'event': event_name,
            'from_date': start_date,
            'to_date': end_date,
            'unit': unit,
            'on': on,
            'where': where,
            'limit': limit,
            'type': data_type,
        },
                                       response_format=response_format)
Example #10
0
    def get_funnel_detail(self,
                          funnel_id,
                          start_date=None,
                          end_date=None,
                          length=14,
                          interval=1,
                          unit=UNIT_DAY,
                          on=None,
                          where=None,
                          limit=None,
                          response_format=FORMAT_JSON):
        """
        Get data for a specified funnel.

        Response format:
        {
            'Signup flow': {
                'data': {
                    '2010-05-24': {
                        'analysis': {
                            'completion': 0.064679359580052493,
                            'starting_amount': 762,
                            'steps': 3,
                            'worst': 2
                        },
                        'steps': [
                            {
                                'count': 762,
                                'goal': 'pages',
                                'overall_conv_ratio': 1.0,
                                'step_conv_ratio': 1.0
                            },
                            {
                                'count': 69,
                                'goal': 'View signup',
                                'overall_conv_ratio': 0.09055118110236221,
                                'step_conv_ratio': 0.09055118110236221
                            },
                            {
                                'count': 10,
                                'goal': 'View docs',
                                'overall_conv_ratio': 0.064679359580052493,
                                'step_conv_ratio': 0.7142857142857143
                            }
                        ]
                    },
                    '2010-05-31': {
                        'analysis': {
                            'completion': 0.12362030905077263,
                            'starting_amount': 906,
                            'steps': 2,
                            'worst': 2
                        },
                        'steps': [
                            {
                                'count': 906,
                                'goal': 'homepage',
                                'overall_conv_ratio': 1.0,
                                'step_conv_ratio': 1.0
                            },
                            {
                                'count': 112,
                                'goal': 'View signup',
                                'overall_conv_ratio': 0.12362030905077263,
                                'step_conv_ratio': 0.12362030905077263
                            }
                        ]
                    }
                },
            'meta': {'dates': ['2010-05-24', '2010-05-31']}
            }
        }
        """
        self._validate_response_format(response_format)
        start_date_obj = self._validate_date(start_date)
        end_date_obj = self._validate_date(end_date)
        # self._validate_expression(on, where)

        # Check the actual dates
        if start_date_obj > end_date_obj:
            raise exceptions.InvalidDateException(
                'The `start_date` specified after the `end_date`; you will not receive any annotations.'
            )

        return self.connection.request('funnels', {
            'funnel_id': funnel_id,
            'from_date': start_date,
            'to_date': end_date,
            'length': length,
            'interval': interval,
            'unit': unit,
            'on': on,
            'where': where,
        },
                                       response_format=response_format)