loaded = [['id','voterbase_id','date_updated']] #column names for log table source_table = 'schema.table' # this is the table with the information I'm pushing to ActionKit log_table = 'schema.table' # this is where we will log every user id that gets marked with a voterbase_id logger.info("Running query to get matches...") query = ''' select distinct id, voterbase_id from {source_table} left join {log_table} using (id, voterbase_id) where voterbase_id is not null and date_updated is null ''' source_data = rs.query(query) if source_data.num_rows > 0: logger.info(f"Will be updating voterbase_id for {source_data.num_rows}...") for row in source_data: user = ak.get_user(user_id=row['id']) user_dict = {"fields": {"vb_voterbase_id": row['voterbase_id']}} update_user = ak.update_user(user_id=row['id'], **user_dict) user = ak.get_user(user_id=row['id']) if user['fields']['vb_voterbase_id'] == row['voterbase_id']: loaded.append([row['id'], row['voterbase_id'],timestamp]) logger.info("Done with loop! Loading into log table...") Table(loaded).to_redshift(log_table,if_exists='append') else: logger.info(f"No one to update today...")
def get_records(self, fields=None, max_records=None, view=None, formula=None, sort=None): """ `Args:` fields: str or lst Only return specified column or list of columns. The column name is case sensitive max_records: int The maximum total number of records that will be returned. view: str If set, only the records in that view will be returned. The records will be sorted according to the order of the view. formula: str The formula will be evaluated for each record, and if the result is not 0, false, "", NaN, [], or #Error! the record will be included in the response. If combined with view, only records in that view which satisfy the formula will be returned. For example, to only include records where ``COLUMN_A`` isn't empty, pass in: ``"NOT({COLUMN_A}='')"`` For more information see `Airtable Docs on formulas. <https://airtable.com/api>`_ Usage - Text Column is not empty: ``airtable.get_all(formula="NOT({COLUMN_A}='')")`` Usage - Text Column contains: ``airtable.get_all(formula="FIND('SomeSubText', {COLUMN_STR})=1")`` sort: str or lst Specifies how the records will be ordered. If you set the view parameter, the returned records in that view will be sorted by these fields. If sorting by multiple columns, column names can be passed as a list. Sorting Direction is ascending by default, but can be reversed by prefixing the column name with a minus sign -. Example usage: ``airtable.get(sort=['ColumnA', '-ColumnB'])`` `Returns:` Parsons Table See :ref:`parsons-table` for output options. """ # Raises an error if sort is None type. Thus, only adding if populated. kwargs = { 'fields': fields, 'max_records': max_records, 'view': view, 'formula': formula } if sort: kwargs['sort'] = sort tbl = Table(self.at.get_all(**kwargs)) return tbl.unpack_dict(column='fields', prepend=False)
id_type="vanid", expand_fields=["reported_demographics", "custom_fields"]) transformed_person = transform_person_for_redshift(person) extra_fields.append(transformed_person) except HTTPError as e: print(e) error = {"vanid": contact_vanid, "error": str(e)[:999]} errors.append(error) logger.info( f'Found {len(extra_fields)} new contacts to add to contacts_extra_fields' ) logger.info(f'Identified {len(errors)} errors. Appending to errors table.') # convert to Parsons table tbl = Table(extra_fields) errors_tbl = Table(errors) tbl.to_csv('extra_fields_test.csv') errors_tbl.to_csv('extra_fields_errors.csv') # copy Table into Redshift, append new rows rs.copy(tbl, 'sunrise.contacts_extra_fields', if_exists='append', distkey='vanid', sortkey=None, alter_table=True) rs.copy(errors_tbl, 'sunrise.get_extra_fields_errors', if_exists='append',
target_list.append(target) counter = collections.Counter(target_list) calls_counter = dict(counter) calls_per_office = [{"name" : key, "num_calls": value} for key, value in calls_counter.items()] return Table(calls_per_office) if __name__ == "__main__": # Get all outreaches for given tool id outreaches = newmode.get_outreaches(TOOL_ID) # Tranform raw outreach data for spreadsheet transformed_outreaches = transform_outreaches(outreaches) # Set up tables for Google Sheets calls_per_day = Table( petl.aggregate( transformed_outreaches.table, key="created_date", aggregation=len ) ) leaderboard = petl.aggregate( transformed_outreaches.table, key="name", aggregation=len ) calls_per_office = get_calls_per_office(transformed_outreaches) # rename columns for spreadsheet calls_per_day.rename_column('value', 'num_calls') calls_per_day=calls_per_day.rename_column('created_date', 'day') calls_per_office=calls_per_office.rename_column('name', 'office') # Sort leaderboard by num calls per person leaderboard_ranked = Table(petl.sort(leaderboard, 'value', reverse=True))
def simple_table(): # Note - If you modify this table, you must also update the related "simple" files. # Fortunately Parsons should make that easy to do :) return Table([{'first': 'Bob', 'last': 'Smith'}])
def test_get_agents(self, m): m.get(HUSTLE_URI + 'groups/Qqp6o90SiE/agents', json=expected_json.agents) agents = self.hustle.get_agents(group_id='Qqp6o90SiE') assert_matching_tables(agents, Table(expected_json.agents['items']))
def test_get_groups(self, m): m.get(HUSTLE_URI + 'organizations/LePEoKzD3/groups', json=expected_json.groups) groups = self.hustle.get_groups('LePEoKzD3') assert_matching_tables(groups, Table(expected_json.groups['items']))
def test_paginate_request(self, m): # Anonymized real output with nested columns self.blob = [ { 'id': 78757050, 'name': 'Person One', 'prefix': None, 'first_name': 'Person', 'middle_name': None, 'last_name': 'One', 'suffix': None, 'address': { 'street': None, 'city': 'CityA', 'state': 'StateI', 'postal_code': '12345', 'country': None}, 'assignee_id': None, 'company_id': 12030795, 'company_name': 'Indivisible CityA', 'contact_type_id': 501950, 'details': None, 'emails': [{'email': '*****@*****.**', 'category': 'work'}], 'phone_numbers': [ {'number': '(541) 555-9585', 'category': 'work'}, {'number': '555-555-9585', 'category': 'work'}], 'socials': [{'url': 'https://gravatar.com/gravatar', 'category': 'gravatar'}], 'tags': [], 'title': None, 'websites': [{'url': 'http://www.IndivisibleCityA.org', 'category': None}], 'custom_fields': [ {'custom_field_definition_id': 125880, 'value': None}, {'custom_field_definition_id': 107297, 'value': None}, {'custom_field_definition_id': 102127, 'value': None}, {'custom_field_definition_id': 135034, 'value': None}, {'custom_field_definition_id': 107298, 'value': None}, {'custom_field_definition_id': 108972, 'value': None}, {'custom_field_definition_id': 125881, 'value': None}], 'date_created': 1558169903, 'date_modified': 1558169910, 'date_last_contacted': 1558169891, 'interaction_count': 1, 'leads_converted_from': [], 'date_lead_created': None }, { 'id': 78477076, 'name': 'Person Two', 'prefix': None, 'first_name': 'Person', 'middle_name': None, 'last_name': 'Two', 'suffix': None, 'address': { 'street': None, 'city': None, 'state': None, 'postal_code': None, 'country': None}, 'assignee_id': 289533, 'company_id': 12096071, 'company_name': 'Indivisible StateII', 'contact_type_id': 501950, 'details': None, 'emails': [{'email': '*****@*****.**', 'category': 'work'}], 'phone_numbers': [{'number': '(908) 555-2941', 'category': 'work'}], 'socials': [], 'tags': ['treasurer'], 'title': 'Treasurer', 'websites': [], 'custom_fields': [ {'custom_field_definition_id': 125880, 'value': None}, {'custom_field_definition_id': 107297, 'value': None}, {'custom_field_definition_id': 102127, 'value': None}, {'custom_field_definition_id': 135034, 'value': None}, {'custom_field_definition_id': 107298, 'value': None}, {'custom_field_definition_id': 108972, 'value': None}, {'custom_field_definition_id': 125881, 'value': None}], 'date_created': 1557761054, 'date_modified': 1558218799, 'date_last_contacted': 1558196341, 'interaction_count': 14, 'leads_converted_from': [], 'date_lead_created': None }, { 'id': 78839154, 'name': 'Person Three', 'prefix': None, 'first_name': 'Person', 'middle_name': None, 'last_name': 'Three', 'suffix': None, 'address': { 'street': None, 'city': 'CityC', 'state': 'StateIII', 'postal_code': '54321', 'country': None}, 'assignee_id': None, 'company_id': 34966944, 'company_name': 'Flip StateIII', 'contact_type_id': 501950, 'details': None, 'emails': [{'email': '*****@*****.**', 'category': 'work'}], 'phone_numbers': [{'number': '(619) 555-7883', 'category': 'work'}], 'socials': [ {'url': 'https://twitter.com/ThreePerson', 'category': 'twitter'}, {'url': 'https://www.facebook.com/Person.n.Three', 'category': 'facebook'}, {'url': 'https://gravatar.com/PersonThree', 'category': 'gravatar'}], 'tags': [], 'title': None, 'websites': [], 'custom_fields': [ {'custom_field_definition_id': 125880, 'value': None}, {'custom_field_definition_id': 107297, 'value': None}, {'custom_field_definition_id': 102127, 'value': None}, {'custom_field_definition_id': 135034, 'value': None}, {'custom_field_definition_id': 107298, 'value': None}, {'custom_field_definition_id': 108972, 'value': None}, {'custom_field_definition_id': 125881, 'value': None}], 'date_created': 1558223367, 'date_modified': 1558223494, 'date_last_contacted': 1558223356, 'interaction_count': 2, 'leads_converted_from': [], 'date_lead_created': None } ] # Mock endpoints m.post( self.cp.uri + '/people/search', json=self.paginate_callback, headers={"filename": "people_search.txt"}) # self.assertTrue( assert_matching_tables( Table(self.blob), Table( self.cp.paginate_request( '/people/search', page_size=1, req_type='POST' ) ) )
def setUp(self): self.cp = Copper('*****@*****.**', 'key') # Using people as the most complicated object for test_get_standard_object() # Defined at self scope for use in test_get_people() self.processed_people = Table([ { 'id': 78757050, 'name': 'Person One', 'prefix': None, 'first_name': 'Person', 'middle_name': None, 'last_name': 'One', 'suffix': None, 'assignee_id': None, 'company_id': 12030795, 'company_name': 'Indivisible CityA', 'contact_type_id': 501950, 'details': None, 'tags': [], 'title': None, 'date_created': 1558169903, 'date_modified': 1558169910, 'date_last_contacted': 1558169891, 'interaction_count': 1, 'leads_converted_from': [], 'date_lead_created': None, 'address_city': 'CityA', 'address_country': None, 'address_postal_code': '12345', 'address_state': 'StateI', 'address_street': None, }, { 'id': 78477076, 'name': 'Person Two', 'prefix': None, 'first_name': 'Person', 'middle_name': None, 'last_name': 'Two', 'suffix': None, 'assignee_id': 289533, 'company_id': 12096071, 'company_name': 'Indivisible StateII', 'contact_type_id': 501950, 'details': None, 'tags': ['treasurer'], 'title': 'Treasurer', 'date_created': 1557761054, 'date_modified': 1558218799, 'date_last_contacted': 1558196341, 'interaction_count': 14, 'leads_converted_from': [], 'date_lead_created': None, 'address_city': None, 'address_country': None, 'address_postal_code': None, 'address_state': None, 'address_street': None }, { 'id': 78839154, 'name': 'Person Three', 'prefix': None, 'first_name': 'Person', 'middle_name': None, 'last_name': 'Three', 'suffix': None, 'assignee_id': None, 'company_id': 34966944, 'company_name': 'Flip StateIII', 'contact_type_id': 501950, 'details': None, 'tags': [], 'title': None, 'date_created': 1558223367, 'date_modified': 1558223494, 'date_last_contacted': 1558223356, 'interaction_count': 2, 'leads_converted_from': [], 'date_lead_created': None, 'address_city': 'CityC', 'address_country': None, 'address_postal_code': '54321', 'address_state': 'StateIII', 'address_street': None } ]) # Tables and table names for test_get_custom_fields() and test_process_custom_fields() self.custom_field_tables = {} self.custom_field_table_names = [ 'custom_fields', 'custom_fields_available', 'custom_fields_options'] self.custom_field_tables['custom_fields'] = Table([ {'id': 101674, 'name': 'Event Date', 'data_type': 'Date'}, {'id': 102127, 'name': 'Date Added', 'data_type': 'Date'}, {'id': 109116, 'name': 'Local Group Subtype', 'data_type': 'Dropdown'} ]) self.custom_field_tables['custom_fields_available'] = Table([ {'id': 101674, 'available_on': 'opportunity'}, {'id': 102127, 'available_on': 'company'}, {'id': 102127, 'available_on': 'person'}, {'id': 109116, 'available_on': 'company'} ]) self.custom_field_tables['custom_fields_options'] = Table([ {"id": 109116, "name": "Local Group Subtype", "options_id": 140251, "options_name": "Public (displayed in map)", "options_rank": 0}, {"id": 109116, "name": "Local Group Subtype", "options_id": 140250, "options_name": "New (Needs Processing)", "options_rank": 4}, {"id": 109116, "name": "Local Group Subtype", "options_id": 140252, "options_name": "Private (not on map)", "options_rank": 1}, {"id": 109116, "name": "Local Group Subtype", "options_id": 140254, "options_name": "National", "options_rank": 5}, {"id": 109116, "name": "Local Group Subtype", "options_id": 140766, "options_name": "Not following principles", "options_rank": 3}, {"id": 109116, "name": "Local Group Subtype", "options_id": 140764, "options_name": "International", "options_rank": 6}, {"id": 109116, "name": "Local Group Subtype", "options_id": 141434, "options_name": "Inactive", "options_rank": 2} ])
def test_distribute_task(self): global count global tableargs datatable = [ ('x', 'y'), (1, 2), (3, 4), (5, 6), (7, 8), (9, 0), (11, 12), (13, 14), (15, 16), (17, 18), (19, 10), ] count = 0 tableargs = None distribute_task( Table(datatable), fake_table_process, 'foo', # bucket group_count=5, storage='local', func_kwargs={ 'x': 1, 'y': [2, 3] }) self.assertEqual(count, 2) assert_matching_tables( tableargs[0], Table([('x', 'y'), (11, 12), (13, 14), (15, 16), (17, 18), (19, 10)])) count = 0 tableargs = None distribute_task( Table(datatable + [(0, 0)]), FakeRunner.foobar, 'foo', # bucket group_count=5, storage='local', func_class=FakeRunner, func_class_kwargs={'init1': 'initx'}, func_kwargs={ 'a': 1, 'x': 2, 'y': 3 }) self.assertEqual(count, 3) self.assertEqual(tableargs[1:], ('initx', 1, 2, 3)) self.assertEqual(tableargs[1:], ('initx', 1, 2, 3)) assert_matching_tables(tableargs[0], Table([('x', 'y'), (0, 0)])) # 3. catch=True (with throwing) count = 0 tableargs = None distribute_task( Table(datatable[:6]), FakeRunner.foobar, 'foo', # bucket group_count=5, storage='local', func_class=FakeRunner, func_class_kwargs={'init1': 'initx'}, catch=True, func_kwargs={ 'a': 'raise', 'x': 2, 'y': 3 })
class TestShopify(unittest.TestCase): mock_count_all = {'count': 2} mock_count_date = mock_count_since = {'count': 1} mock_graphql = {'data': {'orders': {'edges': [{'node': {'id': 1}}]}}} mock_orders_all = { 'orders': [{ 'created_at': '2020-10-19T12:00:00-04:00', 'financial_status': 'paid', 'id': 1 }, { 'created_at': '2020-10-20T12:00:00-04:00', 'financial_status': 'refunded', 'id': 2 }] } mock_orders_completed = { 'orders': [{ 'created_at': '2020-10-19T12:00:00-04:00', 'financial_status': 'paid', 'id': 1 }] } mock_orders_date = mock_orders_since = { 'orders': [{ 'created_at': '2020-10-20T12:00:00-04:00', 'financial_status': 'refunded', 'id': 2 }] } mock_result_all = Table([('created_at', 'financial_status', 'id'), ('2020-10-19T12:00:00-04:00', 'paid', 1), ('2020-10-20T12:00:00-04:00', 'refunded', 2)]) mock_result_completed = Table([('created_at', 'financial_status', 'id'), ('2020-10-19T12:00:00-04:00', 'paid', 1)]) mock_result_date = mock_result_since = Table([ ('created_at', 'financial_status', 'id'), ('2020-10-20T12:00:00-04:00', 'refunded', 2) ]) def setUp(self): self.shopify = Shopify(SUBDOMAIN, PASSWORD, API_KEY, API_VERSION) @requests_mock.Mocker() def test_get_count(self, m): m.get(self.shopify.get_query_url(None, None, "orders", True), json=self.mock_count_all) m.get(self.shopify.get_query_url('2020-10-20', None, "orders", True), json=self.mock_count_date) m.get(self.shopify.get_query_url(None, 2, "orders", True), json=self.mock_count_since) self.assertEqual(self.shopify.get_count(None, None, "orders"), 2) self.assertEqual(self.shopify.get_count('2020-10-20', None, "orders"), 1) self.assertEqual(self.shopify.get_count(None, 2, "orders"), 1) @requests_mock.Mocker() def test_get_orders(self, m): m.get(self.shopify.get_query_url(None, None, 'orders', False), json=self.mock_orders_all) m.get(self.shopify.get_query_url('2020-10-20', None, 'orders', False), json=self.mock_orders_date) m.get(self.shopify.get_query_url(None, 2, 'orders', False), json=self.mock_orders_since) m.get(self.shopify.get_query_url(None, None, 'orders', False) + '&financial_status=paid', json=self.mock_orders_completed) assert_matching_tables(self.shopify.get_orders(None, None, False), self.mock_result_all) assert_matching_tables( self.shopify.get_orders('2020-10-20', None, False), self.mock_result_date) assert_matching_tables(self.shopify.get_orders(None, 2, False), self.mock_result_since) assert_matching_tables(self.shopify.get_orders(None, None, True), self.mock_result_completed) @requests_mock.Mocker() def test_get_query_url(self, m): self.assertEqual( self.shopify.get_query_url(None, None, "orders", True), f'https://{SUBDOMAIN}.myshopify.com/admin/api/{API_VERSION}/orders/' + 'count.json?limit=250&status=any') self.assertEqual( self.shopify.get_query_url('2020-10-20', None, "orders", True), f'https://{SUBDOMAIN}.myshopify.com/admin/api/{API_VERSION}/orders/' + 'count.json?limit=250&status=any&created_at_min=2020-10-20T00:00:00&' + 'created_at_max=2020-10-21T00:00:00') self.assertEqual( self.shopify.get_query_url(None, 2, "orders", True), f'https://{SUBDOMAIN}.myshopify.com/admin/api/{API_VERSION}/orders/' + 'count.json?limit=250&status=any&since_id=2') self.assertEqual( self.shopify.get_query_url(None, None, "orders", False), f'https://{SUBDOMAIN}.myshopify.com/admin/api/{API_VERSION}/orders.json?' + 'limit=250&status=any') @requests_mock.Mocker() def test_graphql(self, m): m.post('https://{0}.myshopify.com/admin/api/{1}/graphql.json'.format( SUBDOMAIN, API_VERSION), json=self.mock_graphql) self.assertEqual( self.shopify.graphql(""" {{ orders(query: "financial_status:=paid", first: 100) {{ edges {{ node {{ id }} }} }} }} """), self.mock_graphql['data'])
class TestRedash(unittest.TestCase): mock_data = 'foo,bar\n1,2\n3,4' mock_result = Table([('foo', 'bar'), ('1', '2'), ('3', '4')]) def setUp(self): self.redash = Redash(BASE_URL, API_KEY) @requests_mock.Mocker() def test_cached_query(self, m): redash = Redash(BASE_URL) # no user_api_key m.get(f'{BASE_URL}/api/queries/5/results.csv', text=self.mock_data) assert_matching_tables(redash.get_cached_query_results(5, API_KEY), self.mock_result) self.assertEqual(m._adapter.last_request.path, '/api/queries/5/results.csv') self.assertEqual(m._adapter.last_request.query, 'api_key=abc123') assert_matching_tables(self.redash.get_cached_query_results(5), self.mock_result) self.assertEqual(m._adapter.last_request.query, '') @requests_mock.Mocker() def test_refresh_query(self, m): m.post(f'{BASE_URL}/api/queries/5/refresh', json={'job': { 'status': 3, 'query_result_id': 21 }}) m.get(f'{BASE_URL}/api/queries/5/results/21.csv', text=self.mock_data) assert_matching_tables( self.redash.get_fresh_query_results(5, {'yyy': 'xxx'}), self.mock_result) @requests_mock.Mocker() def test_refresh_query_poll(self, m): m.post(f'{BASE_URL}/api/queries/5/refresh', json={'job': { 'id': 66, 'status': 1 }}) m.get(f'{BASE_URL}/api/jobs/66', json={'job': { 'id': 66, 'status': 3, 'query_result_id': 21 }}) m.get(f'{BASE_URL}/api/queries/5/results/21.csv', text=self.mock_data) self.redash.pause = 0.01 # shorten pause time assert_matching_tables( self.redash.get_fresh_query_results(5, {'yyy': 'xxx'}), self.mock_result) @requests_mock.Mocker() def test_refresh_query_poll_timeout(self, m): m.post(f'{BASE_URL}/api/queries/5/refresh', json={'job': { 'id': 66, 'status': 1 }}) m.get(f'{BASE_URL}/api/jobs/66', json={'job': {'id': 66, 'status': 1}}) m.get(f'{BASE_URL}/api/queries/5/results/21.csv', text=self.mock_data) self.redash.pause = 0.01 # shorten pause time self.redash.timeout = 0.01 # timeout raised = False try: self.redash.get_fresh_query_results(5, {'yyy': 'xxx'}) except RedashTimeout: raised = True self.assertTrue(raised) @requests_mock.Mocker() def test_to_table(self, m): m.post(f'{BASE_URL}/api/queries/5/refresh', json={'job': { 'status': 3, 'query_result_id': 21 }}) m.get(f'{BASE_URL}/api/queries/5/results/21.csv', text=self.mock_data) self.redash.pause = 0.01 # shorten pause time table_data = Redash.load_to_table(base_url=BASE_URL, user_api_key=API_KEY, query_id=5, params={'x': 'y'}, verify=False) assert_matching_tables(table_data, self.mock_result) @requests_mock.Mocker() def test_to_table_env_vars(self, m): try: _environ = dict(os.environ) os.environ.update({ 'REDASH_BASE_URL': BASE_URL, 'REDASH_USER_API_KEY': API_KEY, 'REDASH_QUERY_ID': "5", 'REDASH_QUERY_PARAMS': "p_x=y" }) m.post(f'{BASE_URL}/api/queries/5/refresh', json={'job': { 'status': 3, 'query_result_id': 21 }}) m.get(f'{BASE_URL}/api/queries/5/results/21.csv', text=self.mock_data) self.redash.pause = 0.01 # shorten pause time assert_matching_tables(Redash.load_to_table(), self.mock_result) finally: os.environ.clear() os.environ.update(_environ)
def process_json(self, json_blob, obj_type, tidy=False): # Internal method for converting most types of json responses into a list of Parsons tables # Output goes here table_list = [] # Original table & columns obj_table = Table(json_blob) cols = obj_table.get_columns_type_stats() list_cols = [x['name'] for x in cols if 'list' in x['type']] dict_cols = [x['name'] for x in cols if 'dict' in x['type']] # Unpack all list columns if len(list_cols) > 0: for l in list_cols: # Check for nested data list_rows = obj_table.select_rows(lambda row: isinstance( row[l], list) and any(isinstance(x, dict) for x in row[l])) # Add separate long table for each column with nested data if list_rows.num_rows > 0: logger.debug(l, 'is a nested column') if len([x for x in cols if x['name'] == l]) == 1: table_list.append({ 'name': f'{obj_type}_{l}', 'tbl': obj_table.long_table(['id'], l) }) else: # Ignore if column doesn't exist (or has multiples) continue else: if tidy is False: logger.debug(l, 'is a normal list column') obj_table.unpack_list(l) # Unpack all dict columns if len(dict_cols) > 0 and tidy is False: for d in dict_cols: logger.debug(d, 'is a dict column') obj_table.unpack_dict(d) if tidy is not False: packed_cols = list_cols + dict_cols for p in packed_cols: if p in obj_table.columns: logger.debug(p, 'needs to be unpacked into rows') # Determine whether or not to expand based on tidy unpacked_tidy = obj_table.unpack_nested_columns_as_rows( p, expand_original=tidy) # Check if column was removed as sign it was unpacked into separate table if p not in obj_table.columns: table_list.append({ 'name': f'{obj_type}_{p}', 'tbl': unpacked_tidy }) else: obj_table = unpacked_tidy # Original table will have had all nested columns removed if len(obj_table.columns) > 1: table_list.append({'name': obj_type, 'tbl': obj_table}) return table_list
def _request(self, url, req_type='GET', post_data=None, args=None, limit=None): # Make sure to have a current token before we make another request now = datetime.now(timezone.utc) if now > self.session_exp: self._get_session_token() # Based on PDI docs # https://api.bluevote.com/docs/index LIMIT_MAX = 2000 headers = { "Content-Type": "application/json", "Authorization": f"Bearer {self.session_token}", } request_fn = { "GET": requests.get, "POST": requests.post, "PUT": requests.put, "DELETE": requests.delete, } if limit and limit <= LIMIT_MAX: args = args or {} args["limit"] = limit args = self._clean_dict(args) if args else args post_data = self._clean_dict(post_data) if post_data else post_data res = request_fn[req_type](url, headers=headers, json=post_data, params=args) logger.debug(f"{res.url} - {res.status_code}") logger.debug(res.request.body) res.raise_for_status() if not res.text: return None logger.debug(res.text) try: res_json = res.json() except JSONDecodeError: res_json = None if "data" not in res_json: return res_json total_count = (0 if "totalCount" not in res_json else res_json["totalCount"]) data = res_json["data"] if not limit: # We don't have a limit, so let's get everything # Start a page 2 since we already go page 1 cursor = 2 while len(data) < total_count: args = args or {} args["cursor"] = cursor args["limit"] = LIMIT_MAX res = request_fn[req_type](url, headers=headers, json=post_data, params=args) data.extend(res.json()["data"]) cursor += 1 return Table(data) else: total_need = min(limit, total_count) cursor = 2 while len(data) < total_need: args = args or {} args["cursor"] = cursor args["limit"] = min(LIMIT_MAX, total_need - len(data)) res = request_fn[req_type](url, headers=headers, json=post_data, params=args) data.extend(res.json()["data"]) cursor += 1 return Table(data)
class TestPostgresDB(unittest.TestCase): def setUp(self): self.temp_schema = TEMP_SCHEMA self.pg = Postgres() self.tbl = Table([['ID', 'Name'], [1, 'Jim'], [2, 'John'], [3, 'Sarah']]) # Create a schema, create a table, create a view setup_sql = f""" drop schema if exists {self.temp_schema} cascade; create schema {self.temp_schema}; """ other_sql = f""" create table {self.temp_schema}.test (id smallint,name varchar(5)); create view {self.temp_schema}.test_view as (select * from {self.temp_schema}.test); """ # noqa: E501 self.pg.query(setup_sql) self.pg.query(other_sql) def tearDown(self): # Drop the view, the table and the schema teardown_sql = f""" drop schema if exists {self.temp_schema} cascade; """ self.pg.query(teardown_sql) def test_query(self): # Check that query sending back expected result r = self.pg.query('select 1') self.assertEqual(r[0]['?column?'], 1) def test_query_with_parameters(self): table_name = f"{self.temp_schema}.test" self.pg.copy(self.tbl, f"{self.temp_schema}.test", if_exists='append') sql = f"select * from {table_name} where name = %s" name = 'Sarah' r = self.pg.query(sql, parameters=[name]) self.assertEqual(r[0]['name'], name) sql = f"select * from {table_name} where name in (%s, %s)" names = ['Sarah', 'John'] r = self.pg.query(sql, parameters=names) self.assertEqual(r.num_rows, 2) def test_copy(self): # Copy a table and ensure table exists self.pg.copy(self.tbl, f'{self.temp_schema}.test_copy', if_exists='drop') r = self.pg.query( f"select * from {self.temp_schema}.test_copy where name='Jim'") self.assertEqual(r[0]['id'], 1) # Copy table and ensure truncate works. self.pg.copy(self.tbl, f'{self.temp_schema}.test_copy', if_exists='truncate') tbl = self.pg.query( f"select count(*) from {self.temp_schema}.test_copy") self.assertEqual(tbl.first, 3) # Copy table and ensure that drop works. self.pg.copy(self.tbl, f'{self.temp_schema}.test_copy', if_exists='drop') tbl = self.pg.query( f"select count(*) from {self.temp_schema}.test_copy") self.assertEqual(tbl.first, 3) # Copy table and ensure that append works. self.pg.copy(self.tbl, f'{self.temp_schema}.test_copy', if_exists='append') tbl = self.pg.query( f"select count(*) from {self.temp_schema}.test_copy") self.assertEqual(tbl.first, 6) # Try to copy the table and ensure that default fail works. self.assertRaises(ValueError, self.pg.copy, self.tbl, f'{self.temp_schema}.test_copy') # Try to copy the table and ensure that explicit fail works. self.assertRaises(ValueError, self.pg.copy, self.tbl, f'{self.temp_schema}.test_copy', if_exists='fail') def test_to_postgres(self): self.tbl.to_postgres(f'{self.temp_schema}.test_copy') r = self.pg.query( f"select * from {self.temp_schema}.test_copy where name='Jim'") self.assertEqual(r[0]['id'], 1) def test_from_postgres(self): tbl = Table([['id', 'name'], [1, 'Jim'], [2, 'John'], [3, 'Sarah']]) self.pg.copy(self.tbl, f'{self.temp_schema}.test_copy', if_exists='drop') out_tbl = self.tbl.from_postgres( f"SELECT * FROM {self.temp_schema}.test_copy") assert_matching_tables(out_tbl, tbl)
def test_process_json(self): # Stress-testing combination of unpack methods with contrived table from hell fake_response = [ { 'id': 1, 'Simple List Col': ['one', 'two', 'three'], 'Mixed List Col': [None, 2, 'three'], 'Spotty List Col': [1, 2, 3], 'Multidim List Col': [[1, 2], [None, 'two'], []], 'Nested List Col': [ {'A': 1, 'B': 'one'}, {'A': 2, 'B': 'two'}, {'A': 3, 'B': 'three'}], 'Simple Dict Col': {'one': 1, 'two': 2, 'three': 3}, 'Nested Dict Col': {'A': 1, 'B': ['two', 2], 'C': [None, 3, 'three']} }, { 'id': 2, 'Simple List Col': ['four', 'five', 'six'], 'Mixed List Col': ['four', None, 6], 'Spotty List Col': [], 'Multidim List Col': [[3, None], [], ['three', 'four']], 'Nested List Col': [ {'A': 4, 'B': 'four'}, {'A': 5, 'B': 'five'}, {'A': 6, 'B': 'six'}], 'Simple Dict Col': {'one': 'I', 'two': 'II', 'three': 'III'}, 'Nested Dict Col': {'A': ['one'], 'B': [], 'C': 3}, }, { 'id': 3, 'Simple List Col': ['seven', 'eight', 'nine'], 'Mixed List Col': [7, 'eight', None], 'Spotty List Col': None, 'Multidim List Col': [['five', 6], [None]], 'Nested List Col': [ {'A': 7, 'B': 'seven'}, {'A': 8, 'B': 'eight'}, {'A': 9, 'B': 'nine'}], 'Simple Dict Col': {'one': 'x', 'two': 'xx', 'three': 'xxx'}, 'Nested Dict Col': {'A': None, 'B': 2, 'C': [None, 3, 'three']} } ] fake_response_tables = {} table_names = ['fake_Nested List Col', 'fake'] fake_response_tables['fake_Nested List Col'] = Table([ {"id": 1, "Nested List Col_A": 1, "Nested List Col_B": "one"}, {"id": 1, "Nested List Col_A": 2, "Nested List Col_B": "two"}, {"id": 1, "Nested List Col_A": 3, "Nested List Col_B": "three"}, {"id": 2, "Nested List Col_A": 4, "Nested List Col_B": "four"}, {"id": 2, "Nested List Col_A": 5, "Nested List Col_B": "five"}, {"id": 2, "Nested List Col_A": 6, "Nested List Col_B": "six"}, {"id": 3, "Nested List Col_A": 7, "Nested List Col_B": "seven"}, {"id": 3, "Nested List Col_A": 8, "Nested List Col_B": "eight"}, {"id": 3, "Nested List Col_A": 9, "Nested List Col_B": "nine"} ]) fake_response_tables['fake'] = Table([ {"id": 1, "Simple List Col": ["one", "two", "three"], "Mixed List Col": [None, 2, "three"], "Spotty List Col": [1, 2, 3], "Multidim List Col": [[1, 2], [None, "two"], []], "Simple Dict Col_one": 1, "Simple Dict Col_three": 3, "Simple Dict Col_two": 2, "Nested Dict Col_A": 1, "Nested Dict Col_B": ["two", 2], "Nested Dict Col_C": [None, 3, "three"]}, {"id": 2, "Simple List Col": ["four", "five", "six"], "Mixed List Col": ["four", None, 6], "Spotty List Col": [], "Multidim List Col": [[3, None], [], ["three", "four"]], "Simple Dict Col_one": "I", "Simple Dict Col_three": "III", "Simple Dict Col_two": "II", "Nested Dict Col_A": ["one"], "Nested Dict Col_B": [], "Nested Dict Col_C": 3}, {"id": 3, "Simple List Col": ["seven", "eight", "nine"], "Mixed List Col": [7, "eight", None], "Spotty List Col": [None], "Multidim List Col": [["five", 6], [None]], "Simple Dict Col_one": "x", "Simple Dict Col_three": "xxx", "Simple Dict Col_two": "xx", "Nested Dict Col_A": None, "Nested Dict Col_B": 2, "Nested Dict Col_C": [None, 3, "three"]} ]) fake_processed = self.cp.process_json(fake_response, 'fake') self.assertTrue([f['name'] for f in fake_processed] == table_names) for tbl in table_names: assert_matching_tables( [f['tbl'] for f in fake_processed if f['name'] == tbl][0], fake_response_tables[tbl] ) fake_tidy = self.cp.process_json(fake_response, 'fake', tidy=0) self.assertTrue(len(fake_tidy) == len(fake_response[0])-1)
def get_registration_report(self, report_id, block=False, poll_interval_seconds=60, report_timeout_seconds=3600): """ Get data from an existing registration report. `Args:` report_id: int The ID of the report to get data from block: bool Whether or not to block execution until the report is complete poll_interval_seconds: int If blocking, how long to pause between attempts to check if the report is done report_timeout_seconds: int If blocking, how long to wait for the report before timing out `Returns:` Parsons Table Parsons table with the report data. """ credentials = { 'partner_id': self.partner_id, 'partner_API_key': self.partner_api_key, } status_url = f'{self.client.uri}/registrant_reports/{report_id}' download_url = None # Let's figure out at what time should we just give up because we waited # too long end_time = datetime.datetime.now() + datetime.timedelta( seconds=report_timeout_seconds) # If we have a download URL, we can move on and just download the # report. Otherwise, as long as we haven't run out of time, we will # check the status. while not download_url and datetime.datetime.now() < end_time: logger.debug( f'Registrations report not ready yet, sleeping %s seconds', poll_interval_seconds) # Check the status again via the status endpoint status_response = self.client.request(status_url, 'get', params=credentials) # Check to make sure the call got a valid response if status_response.status_code == requests.codes.ok: status_json = status_response.json() # Grab the download_url from the response. download_url = status_json.get('download_url') if not download_url and not block: return None else: raise RTVFailure("Couldn't get report status") if not download_url: # We just got the status, so we should wait a minute before # we check it again. time.sleep(poll_interval_seconds) # If we never got a valid download_url, then we timed out waiting for # the report to generate. We will log an error and exit. if not download_url: raise RTVFailure('Timed out waiting for report') # Download the report data download_response = self.client.request(download_url, 'get', params=credentials) # Check to make sure the call got a valid response if download_response.status_code == requests.codes.ok: report_data = download_response.text # Load the report data into a Parsons Table table = Table.from_csv_string(report_data) # Transform the data from the report's CSV format to something more # Pythonic (snake case) normalized_column_names = [ re.sub(r'\s', '_', name).lower() for name in table.columns ] normalized_column_names = [ re.sub(r'[^A-Za-z\d_]', '', name) for name in normalized_column_names ] table.table = petl.setheader(table.table, normalized_column_names) return table else: raise RTVFailure('Unable to download report data')
def test_get_opportunities2(self, m): processed_opps = Table([ { "id": 14340759, "name": "Company1", "assignee_id": 659394, "close_date": None, "company_id": 29324143, "company_name": "Company1", "customer_source_id": None, "details": None, "loss_reason_id": None, "pipeline_id": 489028, "pipeline_stage_id": 2529569, "primary_contact_id": 67747998, "priority": "High", "status": "Open", "tags": ["opportunities import-1540158946352"], "interaction_count": 0, "monetary_unit": "USD", "monetary_value": 100000.0, "converted_unit": None, "converted_value": None, "win_probability": None, "date_stage_changed": 1548866182, "date_last_contacted": None, "leads_converted_from": [], "date_lead_created": None, "date_created": 1540159060, "date_modified": 1550858334 }, { "id": 14161592, "name": "Company2", "assignee_id": 659394, "close_date": "11/10/2018", "company_id": 28729196, "company_name": "Company2", "customer_source_id": None, "details": None, "loss_reason_id": None, "pipeline_id": 531482, "pipeline_stage_id": 2607171, "primary_contact_id": 67243374, "priority": "High", "status": "Open", "tags": [], "interaction_count": 36, "monetary_unit": "USD", "monetary_value": 77000.0, "converted_unit": None, "converted_value": None, "win_probability": None, "date_stage_changed": 1551191957, "date_last_contacted": 1552339800, "leads_converted_from": [], "date_lead_created": None, "date_created": 1539192375, "date_modified": 1552340016 }, { "id": 14286548, "name": "Company3", "assignee_id": 644608, "close_date": "11/18/2018", "company_id": 29492294, "company_name": "Company3", "customer_source_id": None, "details": None, "loss_reason_id": None, "pipeline_id": 531482, "pipeline_stage_id": 2482007, "primary_contact_id": 67637400, "priority": "None", "status": "Open", "tags": [], "interaction_count": 19, "monetary_unit": "USD", "monetary_value": 150000.0, "converted_unit": None, "converted_value": None, "win_probability": 0, "date_stage_changed": 1539870749, "date_last_contacted": 1555534313, "leads_converted_from": [], "date_lead_created": None, "date_created": 1539870749, "date_modified": 1555550658 } ]) processed_opps_cf = Table([ { "id": 14340759, "custom_fields_custom_field_definition_id": 272931, "custom_fields_value": [] }, { "id": 14340759, "custom_fields_custom_field_definition_id": 272927, "custom_fields_value": None }, { "id": 14161592, "custom_fields_custom_field_definition_id": 272931, "custom_fields_value": [] }, { "id": 14161592, "custom_fields_custom_field_definition_id": 272927, "custom_fields_value": None }, { "id": 14286548, "custom_fields_custom_field_definition_id": 272931, "custom_fields_value": [] }, { "id": 14286548, "custom_fields_custom_field_definition_id": 272927, "custom_fields_value": None } ]) m.post( self.cp.uri + '/opportunities/search', json=self.paginate_callback, headers={"filename": "opportunities_search.json"}) processed_blob = self.cp.get_opportunities() blob_opps = [f for f in processed_blob if f['name'] == "opportunities"][0]['tbl'] blob_opps_cf = [f for f in processed_blob if f['name'] == "opportunities_custom_fields"] blob_opps_cf = blob_opps_cf[0]['tbl'] assert_matching_tables(processed_opps, blob_opps) assert_matching_tables(processed_opps_cf, blob_opps_cf)
def test_get_organizations(self, m): m.get(HUSTLE_URI + 'organizations', json=expected_json.organizations) orgs = self.hustle.get_organizations() assert_matching_tables(orgs, Table(expected_json.organizations['items']))
def test_insert(self): fake_data = Table([{'firstname': 'Chrisjen', 'lastname': 'Avasarala'}]) response = self.sf.insert_record('Contact', fake_data) assert self.sf.client.bulk.Contact.insert.called_with(fake_data) assert response[0]['created']
def test_get_tags(self, m): m.get(HUSTLE_URI + 'organizations/LePEoKzD3/tags', json=expected_json.tags) tags = self.hustle.get_tags(organization_id='LePEoKzD3') assert_matching_tables(tags, Table(expected_json.tags['items']))
def test_delete(self): fake_data = Table([{'id': '1234567890AaBbC'}]) response = self.sf.delete_record('Contact', fake_data) assert self.sf.client.bulk.Contact.update.called_with(fake_data) assert not response[0]['created']
def query_with_connection(self, sql, connection, parameters=None, commit=True): """ Execute a query against the database, with an existing connection. Useful for batching queries together. Will return ``None`` if the query returns zero rows. `Args:` sql: str A valid SQL statement connection: obj A connection object obtained from ``mysql.connection()`` parameters: list A list of python variables to be converted into SQL values in your query commit: boolean Whether to commit the transaction immediately. If ``False`` the transaction will be committed when the connection goes out of scope and is closed (or you can commit manually with ``connection.commit()``). `Returns:` Parsons Table See :ref:`parsons-table` for output options. """ with self.cursor(connection) as cursor: # The python connector can only execute a single sql statement, so we will # break up each statement and execute them separately. for s in sql.strip().split(';'): if len(s) != 0: logger.debug(f'SQL Query: {sql}') cursor.execute(s, parameters) if commit: connection.commit() # If the SQL query provides no response, then return None if not cursor.description: logger.debug('Query returned 0 rows') return None else: # Fetch the data in batches, and "pickle" the rows to a temp file. # (We pickle rather than writing to, say, a CSV, so that we maintain # all the type information for each field.) temp_file = files.create_temp_file() with open(temp_file, 'wb') as f: # Grab the header pickle.dump(cursor.column_names, f) while True: batch = cursor.fetchmany(QUERY_BATCH_SIZE) if len(batch) == 0: break logger.debug(f'Fetched {len(batch)} rows.') for row in batch: pickle.dump(row, f) # Load a Table from the file final_tbl = Table(petl.frompickle(temp_file)) logger.debug(f'Query returned {final_tbl.num_rows} rows.') return final_tbl
def test_get_posts(self, m): m.get(self.ct.uri + '/posts', json=expected_posts) posts = self.ct.get_posts() exp_tbl = self.ct._unpack(Table(expected_posts['result']['posts'])) assert_matching_tables(posts, exp_tbl)
def assert_file_matches_table(local_path, table): downloaded_tbl = Table.from_csv(local_path) assert_matching_tables(table, downloaded_tbl)
def test_get_links(self, m): m.get(self.ct.uri + '/links', json=expected_post) post = self.ct.get_links(link='https://nbcnews.to/34stfC2') exp_tbl = self.ct._unpack(Table(expected_post['result']['posts'])) assert_matching_tables(post, exp_tbl)
def test_get_transaction_designations(self, m): m.get(f'{self.bloomerang.uri}transactions/designations/?skip=0&take=50', json=TEST_GET_TRANSACTIONS) assert_matching_tables(self.bloomerang.get_transaction_designations(), Table(TEST_GET_TRANSACTIONS['Results']))
def test_get_companies(self, m): processed_companies = Table([{ 'id': 35015567, 'name': 'Company One', 'assignee_id': None, 'contact_type_id': 547508, 'details': None, 'email_domain': '*****@*****.**', 'tags': [], 'interaction_count': 1, 'date_created': 1558441519, 'date_modified': 1558441535, 'address_city': 'CityA', 'address_country': None, 'address_postal_code': '12345', 'address_state': 'New York', 'address_street': None }, { 'id': 35026533, 'name': 'Company Two', 'assignee_id': None, 'contact_type_id': 547508, 'details': None, 'email_domain': '*****@*****.**', 'tags': [], 'interaction_count': 1, 'date_created': 1558452953, 'date_modified': 1558452967, 'address_city': 'CityB', 'address_country': None, 'address_postal_code': '23451', 'address_state': 'New York', 'address_street': None }, { 'id': 35014973, 'name': 'Company Three', 'assignee_id': None, 'contact_type_id': 547508, 'details': None, 'email_domain': None, 'tags': [], 'interaction_count': 1, 'date_created': 1558434147, 'date_modified': 1558458137, 'address_city': None, 'address_country': None, 'address_postal_code': '34512', 'address_state': 'Alabama', 'address_street': None }, { 'id': 35029116, 'name': 'Company Four', 'assignee_id': None, 'contact_type_id': 547508, 'details': None, 'email_domain': '*****@*****.**', 'tags': [], 'interaction_count': 0, 'date_created': 1558461301, 'date_modified': 1558461301, 'address_city': 'CityD ', 'address_country': None, 'address_postal_code': '45123', 'address_state': 'California', 'address_street': None }, { 'id': 35082308, 'name': 'Company Five', 'assignee_id': None, 'contact_type_id': 547508, 'details': None, 'email_domain': '*****@*****.**', 'tags': [], 'interaction_count': 1, 'date_created': 1558639445, 'date_modified': 1558639459, 'address_city': 'CityE', 'address_country': None, 'address_postal_code': '51234', 'address_state': 'Arizona', 'address_street': None }]) processed_companies_phones = Table([{ 'id': 35082308, 'phone_numbers_category': 'work', 'phone_numbers_number': '123-555-9876' }]) m.post(self.cp.uri + '/companies/search', json=self.paginate_callback, headers={"filename": "companies_search.json"}) processed_blob = self.cp.get_companies() blob_companies = [ f for f in processed_blob if f['name'] == "companies" ][0]['tbl'] blob_companies_phones = [ f for f in processed_blob if f['name'] == "companies_phone_numbers" ][0]['tbl'] assert_matching_tables(processed_companies, blob_companies) assert_matching_tables(processed_companies_phones, blob_companies_phones)