Beispiel #1
0
 def test_token(self):
     api = Client(auth={"token": self.token},
                  address=self.uri,
                  config=ClientConfig(stream=False, response="json"))
     result = api.query(query=self.query)
     self.assertIsNotNone(result)
     self.assertTrue(len(json.loads(result)['object']) > 0)
Beispiel #2
0
 def test_query_id(self):
     api = Client(key=self.key, secret=self.secret, url=self.uri)
     result = api.query(query_id=self.query_id,
                        stream=False, response="json/compact")
     self.assertIsNotNone(result)
     self.assertNotEqual(result, {})
     self.assertEqual(type(len(json.loads(result)['object'])), type(1))
Beispiel #3
0
 def test_query_from_seven_days(self):
     api = Client(key=self.key, secret=self.secret, url=self.uri)
     result = api.query(query=self.query,
                        dates={'from': 'now()-7*day()', 'to': 'now()'},
                        stream=False, response="json")
     self.assertIsNotNone(result)
     self.assertEqual(len(json.loads(result)['object']), 1)
Beispiel #4
0
 def _query_stream(self, response_type, result,
                   processor):
     client = Client(retries=0, config={'address': "URI", "stream": True,
                                        "response": response_type,
                                        "processor": processor})
     client._make_request = MagicMock(return_value=result)
     return client.query()
Beispiel #5
0
 def _query_stream(self, response_type, result,
                   keepAliveToken=DEFAULT_KEEPALIVE_TOKEN):
     client = Client(retries=0, config={'address': "URI", "stream": True,
                                        "response": response_type,
                                        "keepAliveToken": keepAliveToken})
     client._make_request = MagicMock(return_value=result)
     return client.query()
Beispiel #6
0
 def test_query_id(self):
     api = Client(auth={"key": self.key, "secret": self.secret},
                  address=self.uri,
                  config=ClientConfig(stream=False, response="json"))
     result = api.query(query_id=self.query_id)
     self.assertIsNotNone(result)
     self.assertNotEqual(result, {})
     self.assertEqual(type(len(json.loads(result)['object'])), type(1))
Beispiel #7
0
 def test_query_from_seven_days(self):
     api = Client(auth={"key": self.key, "secret": self.secret},
                  address=self.uri,
                  config=ClientConfig(stream=False, response="json"))
     result = api.query(query=self.query,
                        dates={'from': 'now()-7*day()', 'to': 'now()'})
     self.assertIsNotNone(result)
     self.assertEqual(len(json.loads(result)['object']), 1)
Beispiel #8
0
 def test_stream_query(self):
     api = Client(auth={"key": self.key, "secret": self.secret},
                  address=self.uri,
                  config=ClientConfig(response="json/simple"))
     result = api.query(query=self.query)
     self.assertTrue(isinstance(result, types.GeneratorType))
     result = list(result)
     self.assertEqual(len(result), 1)
Beispiel #9
0
 def test_query_from_fixed_dates(self):
     api = Client(self.key, self.secret, self.uri)
     result = api.query(query=self.query,
                        dates={'from': strftime("%Y-%m-%d", gmtime()),
                              'to': strftime("%Y-%m-%d %H:%M:%S", gmtime())},
                        stream=False, response="json")
     self.assertIsNotNone(result)
     self.assertEqual(len(json.loads(result)['object']), 1)
Beispiel #10
0
 def test_pragmas_not_comment_free(self):
     """Test the api when the pragma comment.free is not used"""
     api = Client(key=self.key, secret=self.secret, url=self.uri,
                  user=self.user, app_name=self.app_name, stream=False)
     result = api.query(
         query=self.query,
         response="json")
     self.assertIsNotNone(result)
     self.assertEqual(len(json.loads(result)['object']), 1)
    def __init__(self,
                 profile='default',
                 api_key=None,
                 api_secret=None,
                 end_point=None,
                 oauth_token=None,
                 jwt=None,
                 credential_path=None,
                 timeout=None,
                 retries=1,
                 verify=True,
                 user=None,
                 app_name=None,
                 **kwargs):

        self.profile = profile
        self.api_key = api_key
        self.api_secret = api_secret
        self.end_point = end_point
        self.oauth_token = oauth_token
        self.jwt = jwt

        if credential_path is None:
            self.credential_path = Path.home() / '.devo_credentials'
        else:
            self.credential_path = Path(credential_path).expanduser().resolve()

        if not (self.end_point and (self.oauth_token or self.jwt or
                                    (self.api_key and self.api_secret))):
            self._read_profile()

        if not (self.end_point and (self.oauth_token or self.jwt or
                                    (self.api_key and self.api_secret))):
            raise Exception(
                'End point and either API keys or OAuth Token must be specified or in ~/.devo_credentials'
            )

        config = kwargs
        config.update({
            'auth': {
                'key': self.api_key,
                'secret': self.api_secret,
                'token': self.oauth_token,
                'jwt': self.jwt
            },
            'address': self.end_point
        })
        self.client = Client(config=config)

        self.client.timeout = timeout
        self.client.retries = retries
        self.client.verify = verify

        if user:
            self.client.config.set_user(user)
        if app_name:
            self.client.config.set_app_name(app_name)
Beispiel #12
0
    def test_query(self):
        config = ClientConfig(stream=False, response="json")

        api = Client(auth={"key": self.key, "secret": self.secret},
                     address=self.uri,
                     config=config)

        result = api.query(query=self.query)
        self.assertIsNotNone(result)
        self.assertTrue(len(json.loads(result)['object']) > 0)
Beispiel #13
0
 def test_query_from_fixed_dates(self):
     api = Client(auth={"key": self.key, "secret": self.secret},
                  address=self.uri,
                  config=ClientConfig(stream=False, response="json"))
     result = api.query(query=self.query,
                        dates={'from': strftime("%Y-%m-%d", gmtime()),
                               'to': strftime(
                                   "%Y-%m-%d %H:%M:%S",
                                   gmtime())})
     self.assertIsNotNone(result)
     self.assertEqual(len(json.loads(result)['object']), 1)
Beispiel #14
0
 def test_query_yesterday_to_today(self):
     api = Client(self.key, self.secret, self.uri)
     result = api.query(query=self.query,
                        dates={
                            'from': 'yesterday()',
                            'to': 'today()'
                        },
                        stream=False,
                        response="json")
     self.assertIsNotNone(result)
     self.assertEqual(len(json.loads(result)['object']), 1)
Beispiel #15
0
 def test_pragmas(self):
     """Test the api when the pragma comment.free is used"""
     api = Client(auth={"key": self.key, "secret": self.secret},
                  address=self.uri,
                  config=ClientConfig(response="json",
                                      stream=False))
     api.config.set_user(user=self.user)
     api.config.set_app_name(app_name=self.app_name)
     result = api.query(query=self.query, comment=self.comment)
     self.assertIsNotNone(result)
     self.assertEqual(len(json.loads(result)['object']), 1)
Beispiel #16
0
 def _query_no_stream(self, response_type, result,
                      keepAliveToken=DEFAULT_KEEPALIVE_TOKEN):
     client = Client(retries=0, config={'address': "URI", "stream": False,
                                        "response": response_type,
                                        "keepAliveToken": keepAliveToken})
     with mock.patch(
             'devo.api.Client._make_request') as patched_make_request:
         if isinstance(result, str):
             patched_make_request.return_value.text = result
         else:
             patched_make_request.return_value.content = result
         return client.query()
Beispiel #17
0
class TestApi(unittest.TestCase):
    def setUp(self):
        self.client = Client(
            config={
                'key':
                os.getenv('DEVO_API_KEY', None),
                'secret':
                os.getenv('DEVO_API_SECRET', None),
                'address':
                os.getenv('DEVO_API_ADDRESS', 'https://apiv2-us.devo.com/'),
                "stream":
                False,
                "destination": {
                    "type": "donothing",
                    "params": {
                        "friendlyName": "devo-sdk-api-test"
                    }
                }
            })

    @unittest.skip("temporarily disabled due to Query API bug")
    def test_jobs_cycle(self):
        self.client.query(query="from demo.ecommerce.data select *",
                          dates={'from': '2018-01-01 00:00:00'})

        # Get all jobs
        result = self.client.get_jobs()
        self.assertTrue(result['object'])

        # Get job by name
        result = self.client.get_jobs(name="devo-sdk-api-test")
        self.assertTrue(result['object'])

        # Get job by type
        result = self.client.get_jobs(job_type="donothing")
        self.assertTrue(result['object'])

        # Get job by name and type
        result = self.client.get_jobs(name="devo-sdk-api-test",
                                      job_type="donothing")
        self.assertTrue(result['object'])
        job_id = result['object'][0]['id']

        # Stop job by id
        result = self.client.stop_job(job_id)
        self.assertEqual(result['object']['status'], "STOPPED")

        # Start job by id
        result = self.client.start_job(job_id)
        self.assertEqual(result['object']['status'], "RUNNING")

        # Delete job by id
        result = self.client.remove_job(job_id)
        self.assertEqual(result['object']['status'], "REMOVED")
Beispiel #18
0
def configure(args):
    """
    Load CLI configuration
    :param args: args from files, launch vars, etc
    :return: Client API Object and Config values in array
    """
    config = Configuration()
    if args.get('config') != "~/.devo.json":
        config.load_json(args.get('config'), 'api')

    config.mix(dict(args))

    if "key" not in args.keys() and "api" not in args.keys() \
            and "token" not in args.keys():
        config.set("key", os.environ.get('DEVO_API_KEY', None))
        config.set("secret", os.environ.get('DEVO_API_SECRET', None))
        if "url" not in args.keys():
            config.set("url", os.environ.get('DEVO_API_URL', None))

    if not config.keys("key") and not config.keys("api") \
            and not config.keys("token") \
            and os.path.exists("~/.devo.json"):
        config.load_default_json('api')

    config.keys('from')
    config.keys('to')

    # Try to compose the api
    api = None
    try:
        api = Client.from_config(config.get())
    except DevoClientException as error:
        print_error(str(error), show_help=True)
    return api, config.get()
Beispiel #19
0
    def test_from_config(self):
        api = Client.from_config(
            {'key': self.key, 'secret': self.secret, 'url': self.uri,
             'user': self.user, 'app_name': self.app_name}
            )

        self.assertTrue(isinstance(api, Client))
Beispiel #20
0
def configure(args):
    """
    Load CLI configuration
    :param args: args from files, launch vars, etc
    :return: Client API Object and Config values in array
    """
    config = Configuration()
    try:
        if args.get('config'):
            config.load_config(args.get('config'), 'api')

        if args.get('env'):
            config.set("key", os.environ.get('DEVO_API_KEY', None))
            config.set("secret", os.environ.get('DEVO_API_SECRET', None))
            config.set("url", os.environ.get('DEVO_API_URL', None))
            config.set("user", os.environ.get('DEVO_API_USER', None))
            config.set("comment", os.environ.get('DEVO_API_COMMENT', None))

        if args.get('default'):
            config.load_default_config(section="api")
    finally:
        config.mix(dict(args))
        conf = config.get()

    # Try to compose the api
    api = None
    try:
        api = Client.from_config(conf)
    except DevoClientException as error:
        print_error(str(error), show_help=True)
    return api, conf
Beispiel #21
0
 def test_stream_query_no_results_bounded_dates(self):
     api = Client(auth={
         "key": self.key,
         "secret": self.secret
     },
                  address=self.uri,
                  config=ClientConfig(response="json/simple"),
                  retries=3)
     result = api.query(query=self.query_no_results,
                        dates={
                            'from': '1h',
                            'to': 'now()'
                        })
     self.assertTrue(isinstance(result, types.GeneratorType))
     result = list(result)
     self.assertEqual(len(result), 0)
Beispiel #22
0
    def test_from_dict(self):
        api = Client(config=
            {'key': self.key, 'secret': self.secret, 'address': self.uri,
             'user': self.user, 'app_name': self.app_name}
            )

        self.assertTrue(isinstance(api, Client))
Beispiel #23
0
 def setUp(self):
     self.client = Client(
         config={
             'key':
             os.getenv('DEVO_API_KEY', None),
             'secret':
             os.getenv('DEVO_API_SECRET', None),
             'address':
             os.getenv('DEVO_API_ADDRESS', 'https://apiv2-us.devo.com/'),
             "stream":
             False,
             "destination": {
                 "type": "donothing",
                 "params": {
                     "friendlyName": "devo-sdk-api-test"
                 }
             }
         })
Beispiel #24
0
 def test_stream_query_no_results_unbounded_dates(self):
     api = Client(auth={
         "key": self.key,
         "secret": self.secret
     },
                  address=self.uri,
                  config=ClientConfig(response="json/simple"),
                  retries=3)
     result = api.query(query=self.query_no_results)
     self.assertTrue(isinstance(result, types.GeneratorType))
     try:
         with stopit.ThreadingTimeout(3) as to_ctx_mgr:
             result = list(result)
     except DevoClientException:
         # This exception is sent because
         # devo.api.client.Client._make_request catches the
         # stopit.TimeoutException, but the latter is not
         # wrapped, so we cannot obtain it from here.
         self.assertEqual(to_ctx_mgr.state, to_ctx_mgr.TIMED_OUT)
Beispiel #25
0
 def setUp(self):
     self.uri = os.getenv('DEVO_API_URL', 'https://api-us.logtrust.com/')
     self.client = Client.from_config({
         'key':
         os.getenv('DEVO_API_KEY', None),
         'secret':
         os.getenv('DEVO_API_SECRET', None),
         'url':
         os.getenv('DEVO_API_URL', 'https://api-us.logtrust.com/'),
     })
Beispiel #26
0
    def test_unsecure_http_query(self):
        """
        This test is intended for checking unsecure HTTP requests. Devo will NEVER provide an unsecure HTTP endpoint
        for API REST services. Therefore, you are not going to need to use or test this functionality.
        In order to enable UNSECURE_HTTP environment var should be TRUE.
        The endpoint is served by https://httpbin.org/. You can run with `docker run -p 80:80 kennethreitz/httpbin`. It
        will expose an HTTP service at port 80. The URL `http://localhost:80/anything` will answer with the content of
        the request.
        """
        os.environ["UNSECURE_HTTP"] = "TRUE"
        config = ClientConfig(stream=False, response="json")

        api = Client(auth={
            "key": self.key,
            "secret": self.secret
        },
                     address="localhost:80/anything",
                     config=config,
                     retries=3)

        result = api.query(query=self.query)
        self.assertIsNotNone(result)
        self.assertIn('json', json.loads(result))
        self.assertIn('query', json.loads(result)['json'])
Beispiel #27
0
    def __init__(self,
                 profile='default',
                 api_key=None,
                 api_secret=None,
                 end_point=None,
                 oauth_token=None,
                 jwt=None,
                 credential_path=None):

        self.profile = profile
        self.api_key = api_key
        self.api_secret = api_secret
        self.end_point = end_point
        self.oauth_token = oauth_token
        self.jwt = jwt

        if credential_path is None:
            self.credential_path = Path.home() / '.devo_credentials'
        else:
            self.credential_path = Path(credential_path).resolve().expanduser()

        if not (self.end_point and (self.oauth_token or self.jwt or
                                    (self.api_key and self.api_secret))):
            self._read_profile()

        if not (self.end_point and (self.oauth_token or self.jwt or
                                    (self.api_key and self.api_secret))):
            raise Exception(
                'End point and either API keys or OAuth Token must be specified or in ~/.devo_credentials'
            )

        self.client = Client(auth=dict(key=self.api_key,
                                       secret=self.api_secret,
                                       token=self.oauth_token,
                                       jwt=self.jwt),
                             address=self.end_point)
Beispiel #28
0
def process_response(response, config):
    """
    process responses from Client API
    :param response: data received from Devo API
    :param config: array with launch options
    :return: None
    """
    try:
        file_printer = open(config['output'], 'w')\
         if 'output' in config.keys() else None
    except (OSError, IOError) as error:
        print_error("Error: (%s)" % error)

    if not Client.stream_available(config['response']):
        config['stream'] = False

    printer = line_printer(file_printer)
    if config['stream']:
        for item in response:
            printer(item)
    else:
        printer(response)
Beispiel #29
0
import os
from devo.api import Client, ClientConfig, JSON

key = os.getenv('DEVO_API_KEY', None)
secret = os.getenv('DEVO_API_SECRET', None)

api = Client(auth={
    "key": key,
    "secret": secret
},
             address="https://apiv2-eu.devo.com/search/query",
             config=ClientConfig(response="json", processor=JSON))

response = api.query(query="from demo.ecommerce.data select * limit 20",
                     dates={
                         'from': "today()-1*day()",
                         'to': "today()"
                     })

print(response)
class Reader(object):
    def __init__(self,
                 profile='default',
                 api_key=None,
                 api_secret=None,
                 end_point=None,
                 oauth_token=None,
                 jwt=None,
                 credential_path=None,
                 timeout=None,
                 retries=1,
                 verify=True,
                 user=None,
                 app_name=None,
                 **kwargs):

        self.profile = profile
        self.api_key = api_key
        self.api_secret = api_secret
        self.end_point = end_point
        self.oauth_token = oauth_token
        self.jwt = jwt

        if credential_path is None:
            self.credential_path = Path.home() / '.devo_credentials'
        else:
            self.credential_path = Path(credential_path).expanduser().resolve()

        if not (self.end_point and (self.oauth_token or self.jwt or
                                    (self.api_key and self.api_secret))):
            self._read_profile()

        if not (self.end_point and (self.oauth_token or self.jwt or
                                    (self.api_key and self.api_secret))):
            raise Exception(
                'End point and either API keys or OAuth Token must be specified or in ~/.devo_credentials'
            )

        config = kwargs
        config.update({
            'auth': {
                'key': self.api_key,
                'secret': self.api_secret,
                'token': self.oauth_token,
                'jwt': self.jwt
            },
            'address': self.end_point
        })
        self.client = Client(config=config)

        self.client.timeout = timeout
        self.client.retries = retries
        self.client.verify = verify

        if user:
            self.client.config.set_user(user)
        if app_name:
            self.client.config.set_app_name(app_name)

    def _read_profile(self):
        """
        Read Devo API keys from a credentials file located
        at ~/.devo_credentials if credentials are not provided

        Use profile to specify which set of credentials to use
        """

        config = configparser.ConfigParser()
        config.read(self.credential_path)

        if self.profile in config:
            profile_config = config[self.profile]

            self.api_key = profile_config.get('api_key')
            self.api_secret = profile_config.get('api_secret')
            self.end_point = profile_config.get('end_point')
            self.oauth_token = profile_config.get('oauth_token')

            if self.end_point == 'USA':
                self.end_point = 'https://apiv2-us.devo.com/search/query'
            elif self.end_point == 'EU':
                self.end_point = 'https://apiv2-eu.devo.com/search/query'

    def query(self,
              linq_query,
              start,
              stop=None,
              output='dict',
              ts_format='datetime',
              comment=None):

        valid_outputs = ('dict', 'list', 'namedtuple', 'dataframe')
        if output not in valid_outputs:
            raise Exception(f"Output must be one of {valid_outputs}")

        if output == 'dataframe' and stop is None:
            raise Exception("DataFrame can't be build from continuous query")

        type_dict = self._get_types(linq_query, start, ts_format)
        res = self._query(linq_query,
                          start,
                          stop,
                          mode='csv',
                          stream=True,
                          comment=comment)
        results = self._stream(res, type_dict)

        cols = next(results)
        results = getattr(self, f'_to_{output}')(results, cols)

        if output == 'dataframe':
            return results
        else:
            return DSResults(res, results)

    def _stream(self, res, type_dict):

        result = self._decode_results(res)
        result = csv.reader(result)
        try:
            cols = next(result)
            type_list = [type_dict[c] for c in cols]

            if len(cols) != len(type_dict):
                raise Exception(
                    "Duplicate column names encountered, custom columns must be named"
                )

            yield cols

            for row in result:
                yield [t(v) for t, v in zip(type_list, row)]
        except Exception as e:
            res.close()
            raise (e)

    def _query(self,
               linq_query,
               start,
               stop=None,
               mode='csv',
               stream=False,
               limit=None,
               comment=None):
        if (getattr(start, 'tzinfo', 1) is None) or (getattr(
                stop, 'tzinfo', 1) is None):
            warnings.warn('Naive date interpreted as UTC')

        start = self._to_unix(start)
        stop = self._to_unix(stop)

        dates = {'from': start, 'to': stop}
        self.client.config.response = mode
        self.client.config.stream = stream

        response = self.client.query(query=linq_query,
                                     dates=dates,
                                     limit=limit,
                                     comment=comment)

        return response

    @staticmethod
    def _null_decorator(f):
        def null_f(v):
            if v == '':
                return None
            else:
                return f(v)

        return null_f

    @staticmethod
    def make_ts_func(ts_format):
        if ts_format not in ('datetime', 'iso', 'timestamp'):
            raise Exception(
                'ts_format must be one of: datetime, iso, or timestamp ')

        def ts_func(t):
            dt = datetime.datetime.strptime(t.strip(), '%Y-%m-%d %H:%M:%S.%f')
            dt = dt.replace(tzinfo=timezone.utc)

            if ts_format == 'datetime':
                return dt
            elif ts_format == 'iso':
                return dt.isoformat()
            elif ts_format == 'timestamp':
                return dt.timestamp()

        return ts_func

    def _make_type_map(self, ts_format):

        funcs = {
            'timestamp': self.make_ts_func(ts_format),
            'str': str,
            'int8': int,
            'int4': int,
            'float8': float,
            'float4': float,
            'bool': lambda b: b == 'true'
        }

        decorated_funcs = {
            t: self._null_decorator(f)
            for t, f in funcs.items()
        }
        decorated_str = self._null_decorator(str)

        return defaultdict(lambda: decorated_str, decorated_funcs)

    def _get_types(self, linq_query, start, ts_format):
        """
        Gets types of each column of submitted
        """
        type_map = self._make_type_map(ts_format)

        # so we don't have  stop ts in future as required by API V2
        stop = self._to_unix(start)
        start = stop - 1

        response = self._query(linq_query,
                               start=start,
                               stop=stop,
                               mode='json/compact',
                               limit=1)

        try:
            data = json.loads(response)
            check_status(data)
        except ValueError:
            raise Exception('API V2 response error')

        col_data = data['object']['m']
        type_dict = {c: type_map[v['type']] for c, v in col_data.items()}

        return type_dict

    @staticmethod
    def _to_unix(date, milliseconds=False):
        """
        Convert date to a unix timestamp

        date: A unix timestamp in second, a datetime object,
        pandas.Timestamp object, or string to be parsed
        by pandas.to_datetime
        """

        if date is None:
            return None

        elif date == 'now':
            epoch = datetime.datetime.now().timestamp()
        elif type(date) == str:
            epoch = pd.to_datetime(date).timestamp()
        elif isinstance(date, (pd.Timestamp, datetime.datetime)):
            if date.tzinfo is None:
                epoch = date.replace(tzinfo=timezone.utc).timestamp()
            else:
                epoch = date.timestamp()
        elif isinstance(date, (int, float)):
            epoch = date
        else:
            raise Exception('Invalid Date')

        if milliseconds:
            epoch *= 1000

        return int(epoch)

    @staticmethod
    def _decode_results(r):
        r = iter(r)

        # catch error not reported for json/compact
        first = next(r)
        try:
            data = json.loads(first)
            check_status(data)
        except ValueError:
            pass

        yield first.decode(
            'utf-8').strip()  # APIV2 adding space to first line of aggregates
        for l in r:
            yield l.decode('utf-8')

    @staticmethod
    def _to_list(results, cols):
        yield from results

    @staticmethod
    def _to_dict(results, cols):
        for row in results:
            yield {c: v for c, v in zip(cols, row)}

    @staticmethod
    def _to_namedtuple(results, cols):
        Row = namedtuple('Row', cols)
        for row in results:
            yield Row(*row)

    @staticmethod
    def _to_dataframe(results, cols):
        return pd.DataFrame(results, columns=cols).fillna(np.nan)

    def randomSample(self, linq_query, start, stop, sample_size):

        if (sample_size < 1) or (not isinstance(sample_size, int)):
            raise Exception('Sample size must be a positive int')

        size_query = f'{linq_query} group select count() as count'

        r = self.query(size_query, start, stop, output='list')
        table_size = next(r)[0]

        if sample_size >= table_size:
            warning_msg = 'Sample size greater than or equal to total table size. Returning full table'
            warnings.warn(warning_msg)
            return self.query(linq_query, start, stop, output='dataframe')

        p = self._find_optimal_p(n=table_size, k=sample_size, threshold=0.99)

        sample_query = f'{linq_query} where simplify(float8(rand())) < {p}'

        while True:
            df = self.query(sample_query, start, stop, output='dataframe')

            if df.shape[0] >= sample_size:
                return df.sample(sample_size).sort_index().reset_index(
                    drop=True)
            else:
                pass

    @staticmethod
    def _loc_scale(n, p):
        """
        Takes parameters of a binomial
        distribution and finds the mean
        and std for a normal approximation

        :param n: number of trials
        :param p: probability of success
        :return: mean, std
        """
        loc = n * p
        scale = np.sqrt(n * p * (1 - p))

        return loc, scale

    def _find_optimal_p(self, n, k, threshold):
        """
        Use a normal approximation to the
        binomial distribution.  Starts with
        p such that mean of B(n,p) = k
        and iterates.

        :param n: number of trials
        :param k: desired number of successes
        :param threshold: desired probability to achieve k successes

        :return: probability that a single trial that will yield
                 at least k success with n trials with probability of threshold

        """
        p = k / n
        while True:
            loc, scale = self._loc_scale(n, p)
            # sf = 1 - cdf, but can be more accurate according to scipy docs
            if norm.sf(x=k - 0.5, loc=loc, scale=scale) > threshold:
                break
            else:
                p = min(1.001 * p, 1)

        return p

    def population_sample(self, query, start, stop, column, sample_size):

        population_query = f'{query} group by {column}'

        df = self.randomSample(population_query, start, stop, sample_size)
        population = df[column]
        sample_set = ','.join(f'"{x}"' for x in population)

        sample_query = f'{query} where str({column}) in {{{sample_set}}}'

        return self.query(sample_query, start, stop, output='dataframe')