def _paginate_request(self, endpoint, collection, **kwargs): # The max object we're requesting at a time. # This is an nternal limit to not overload slack api LIMIT = 200 items = [] next_page = True cursor = None while next_page: resp = self.client.api_call(endpoint, cursor=cursor, limit=LIMIT, **kwargs) if not resp['ok']: if resp['error'] == 'ratelimited': time.sleep(int(resp['headers']['Retry-After'])) continue raise SlackClientError(resp['error']) items.extend(resp[collection]) if resp["response_metadata"]["next_cursor"]: cursor = resp["response_metadata"]["next_cursor"] else: next_page = False return Table(items)
def get_signups_statuses(self, event_id=None, event_type_id=None): """ Get a list of valid signup statuses for a given event type or event. You must pass one of ``event_id`` or ``event_type_id`` but not both. `Args:` event_id: int A valid event id. event_type_id: int A valid event type id. `Returns:` Parsons Table See :ref:`parsons-table` for output options. """ if event_id is None and event_type_id is None: raise ValueError('One of event_id or event_type_id must be populated') if event_id is not None and event_type_id is not None: raise ValueError('Event Id and Event Type ID may not BOTH be populated') if event_id: params = {'eventId': event_id} if event_type_id: params = {'eventTypeId': event_type_id} tbl = Table(self.connection.get_request('signups/statuses', params=params)) logger.info(f'Found {tbl.num_rows} signups.') return tbl
def test_put_and_get_file(self): # put_file is part of setup, so just testing getting it here path = self.s3.get_file(self.test_bucket, self.test_key) result_tbl = Table.from_csv(path) assert_matching_tables(self.tbl, result_tbl)
def _as_table(self, paginated_list, page=None, page_size=100): """Converts a paginated list into a Parsons ``Table``. Uses the ``_rawData`` property of each item instead of calling ``raw_data`` to avoid making a separate request for each item in a page for types that PyGithub doesn't consider complete. Args: paginated_list: ``pygithub.PaginatedList.PaginatedList`` PyGithub paginated list page: Optional[int] Page number to load. Defaults to None. If not specified, all results are returned. page_size: int Page size. Defaults to 100. Ignored if ``page`` is not set. Returns: ``Table`` Table object created from the raw data of the list """ if page is not None: page_start = (page - 1) * page_size page_end = page_start + page_size list_pages = paginated_list[page_start:page_end] else: list_pages = paginated_list return Table([list_item._rawData for list_item in list_pages])
def get_codes(self, name=None, supported_entities=None, parent_code_id=None, code_type=None): """ Get codes. `Args:` name : str Filter by name of code. supported_entities: str Filter by supported entities. parent_code_id: str Filter by parent code id. code_type: str Filter by code type. `Returns:` Parsons Table See :ref:`parsons-table` for output options. """ params = {'name': name, 'supportedEntities': supported_entities, 'parentCodeId': parent_code_id, 'codeType': code_type, '$top': 200 } tbl = Table(self.connection.get_request('codes', params=params)) logger.info(f'Found {tbl.num_rows} codes.') return tbl
def get_events_deleted(self, organization_id=None, updated_since=None): """ Fetch deleted public events on the platform. ** Public end point ** `Args:` organization_id: list or int Filter events by a single or multiple organization ids updated_since: str Filter to events updated since given date (ISO Date) `Returns` Parsons Table See :ref:`parsons-table` for output options. """ if isinstance(organization_id, (str, int)): organization_id = [organization_id] args = { 'organization_id': organization_id, 'updated_since': iso_to_unix(updated_since) } return Table( self.request_paginate(self.uri + 'events/deleted', args=args))
def get_target_export(self, export_job_id): """ Get specific target export job id's status. `Returns:` Parsons Table See :ref:`parsons-table` for output options. """ response = self.connection.get_request( f'targetExportJobs/{export_job_id}') json_string = json.dumps(response) json_obj = json.loads(json_string) for i in json_obj: job_status = i['jobStatus'] if job_status == 'Complete': for j in json_obj: csv = j['file']['downloadUrl'] response_csv = requests.get(csv) return Table.from_csv_string(response_csv.text) elif job_status == 'Pending' or job_status == 'InProcess': logger.info( f'Target export job is pending or in process for {export_job_id}.' ) else: raise TargetsFailed(f'Target export failed for {export_job_id}')
def get_survey_questions(self, statuses=['Active'], name=None, sq_type=None, question=None, cycle=None): """ Get survey questions. `Args:` statuses: list Filter to a list of statuses of survey questions. One or more of ``Active``, ``Archived``, and ``Inactive``. name: str Filter to survey questions with names begin with the input. type: str Filter to survey questions of a given type. question: str Filter to survey questions with script questions that contain the given input. cycle: str Filter to survey suestions with the given cycle. A year in the format "YYYY". `Returns:` Parsons Table See :ref:`parsons-table` for output options. """ params = {'statuses': statuses, '$top': self.page_size, 'name': name, 'type': sq_type, 'question': question, 'cycle': cycle} tbl = Table(self.connection.get_request('surveyQuestions', params=params)) logger.info(f'Found {tbl.num_rows} survey questions.') return tbl
def get_cached_query_results(self, query_id=None, query_api_key=None): """ Get the results from a cached query result and get back the CSV http response object back with the CSV string in result.content `Args:` query_id: str or int The query id of the query query_api_key: str If you did not supply a user_api_key on the Redash object, then you can supply a query_api_key to get cached results back anonymously. `Returns:` Table Class """ query_id = check('REDASH_QUERY_ID', query_id) query_api_key = check('REDASH_QUERY_API_KEY', query_api_key, optional=True) params = {} if not self.user_api_key and query_api_key: params['api_key'] = query_api_key response = self.session.get( f'{self.base_url}/api/queries/{query_id}/results.csv', params=params, verify=self.verify) if response.status_code != 200: raise RedashQueryFailed( f'Failed getting results for query {query_id}. {response.text}' ) return Table.from_csv_string(response.text)
def get_custom_fields_values(self, field_type='contacts'): """ Get custom field values as a long table. `Args:` field_type : str Filter by custom field group type. Must be one of ``contacts`` or ``contributions``. `Returns:` Parsons Table See :ref:`parsons-table` for output options. """ tbl = self.get_custom_fields() # Some custom fields do no have associated values. If this is the case then # we should return an empty Table, but with the expected columns. if tbl.get_column_types('availableValues') == ['NoneType']: logger.info(f'Found 0 custom field values.') return Table([{ 'customFieldId': None, 'id': None, 'name': None, 'parentValueId': None }]) else: logger.info(f'Found {tbl.num_rows} custom field values.') return tbl.long_table('customFieldId', 'availableValues', prepend=False)
def get_orders(self, query_date=None, since_id=None, completed=True): """ Get Shopify orders. `Args:` query_date: str Filter query by a date that rows were created. Format: yyyy-mm-dd. This filter is ignored if value is None. since_id: str Filter query by a minimum ID. This filter is ignored if value is None. completed: bool True if only getting completed orders, False otherwise. `Returns:` Table Class """ orders = [] def _append_orders(url): nonlocal orders if completed: url += '&financial_status=paid' res = self.client.request(url, 'GET') cur_orders = res.json().get("orders", []) # Flatten orders to non-complex types for order in cur_orders: keys_to_add = {} keys_to_delete = [] for key1 in order: if isinstance(order[key1], dict): for key2 in order[key1]: keys_to_add[key1 + '_' + key2] = order[key1][key2] keys_to_delete.append(key1) elif key1 == 'note_attributes': for note in order[key1]: keys_to_add[key1 + '_' + note['name']] = note['value'] order.update(keys_to_add) for key in keys_to_delete: del order[key] orders += cur_orders return res res = _append_orders(self.get_query_url(query_date, since_id, "orders", False)) # Get next page while res.headers.get("Link"): link = re.split('; |, ', res.headers.get("Link")) if len(link) and link[len(link) - 1] == 'rel="next"': res = _append_orders(link[len(link) - 2][1:-1]) else: break return Table(orders)
def get_rows(self, offset=0, chunk_size=None, order_by=None): data = self.data.cut(*self.data.columns) if order_by: data.sort(order_by) return Table(data[offset:chunk_size + offset])
def get_attendances(self, organization_id=None, updated_since=None): """ Fetch all attendances which were either promoted by the organization or were for events owned by the organization. .. note:: API Key Required `Args:` organization_id: list of int Filter events by a single or multiple organization ids updated_since: str Filter to events updated since given date (ISO Date) `Returns` Parsons Table See :ref:`parsons-table` for output options. """ url = self.uri + 'organizations/' + str( organization_id) + '/attendances' return Table( self.request_paginate( url, args={'updated_since': date_to_timestamp(updated_since)}, auth=True))
def download_table(self, repo_name, path, branch=None, local_path=None, delimiter=','): """Download a CSV file from a repo by path and branch as a Parsons Table. Args: repo_name: str Full repo name (account/name) path: str Path from the repo base directory branch: Optional[str] Branch to download file from. Defaults to repo default branch local_path: Optional[str] Local file path to download file to. Will create a temp file if not supplied. delimiter: Optional[str] The CSV delimiter to use to parse the data. Defaults to ',' Returns: Parsons Table See :ref:`parsons-table` for output options. """ downloaded_file = self.download_file(repo_name, path, branch, local_path) return Table(petl.fromcsv(downloaded_file, delimiter=delimiter))
def test_get_rows(self): data = [['name', 'user_name', 'id'], ['me', 'myuser', '1'], ['you', 'hey', '2'], ['you', 'hey', '3']] tbl = Table(data) assert_matching_tables(self.tbl.get_rows(), tbl)
def test_append_user_entered_to_spreadsheet(self): # Testing whether we can insert formulas with user_entered_value self.google_sheets.add_sheet(self.spreadsheet_id, 'Sheet3') append_table = Table([ { 'col1': 3, 'col2': 9, 'col3': '=A2*B2' }, { 'col1': 'Buda', 'col2': 'Pest', 'col3': '=A3&LOWER(B3)' }, ]) self.google_sheets.append_to_sheet(self.spreadsheet_id, append_table, 2, user_entered_value=True) result_table = self.google_sheets.read_sheet(self.spreadsheet_id, 2) # Get the values from col3 which has fomulas formula_vals = [row['col3'] for row in result_table] # Test that the value is what's expected from each formula self.assertEqual(formula_vals[0], '27') self.assertEqual(formula_vals[1], 'Budapest')
def test_to_from_json_compressed(self): path = 'tmp/test.json.gz' self.tbl.to_json(path) result_tbl = Table.from_json(path) assert_matching_tables(self.tbl, result_tbl) os.remove(path)
def test_to_from_json_line_delimited(self): path = 'tmp/test.json' self.tbl.to_json(path, line_delimited=True) result_tbl = Table.from_json(path, line_delimited=True) assert_matching_tables(self.tbl, result_tbl) os.remove(path)
def test_first(self): # Test that the first value in the table is returned. self.assertEqual(self.tbl.first, 'Bob') # Test empty value returns None empty_tbl = Table([[1], [], [3]]) self.assertIsNone(empty_tbl.first)
def select_rows(self, *filters): """ Select specific rows from a Parsons table based on the passed filters. Example filters: .. code-block:: python tbl = Table([['foo', 'bar', 'baz'], ['c', 4, 9.3], ['a', 2, 88.2], ['b', 1, 23.3],]) # You can structure the filter in multiple wayss # Lambda Function tbl2 = tbl.select_rows(lambda row: row.foo == 'a' and row.baz > 88.1) tbl2 >>> {foo: 'a', 'bar': 2, 'baz': 88.1} # Expression String tbl3 = tbl.select_rows("{foo} == 'a' and {baz} > 88.1") tbl3 >>> {foo: 'a', 'bar': 2, 'baz': 88.1} `Args:` \*filters: function or str `Returns:` A new parsons table containing the selected rows """ # noqa: W605 from parsons.etl.table import Table return Table(petl.select(self.table, *filters))
def test_get_changed_entity_resource_fields(self, m): json = [{ 'fieldName': 'ActivistCodeID', 'fieldType': 'N', 'maxTextboxCharacters': None, 'isCoreField': True, 'availableValues': None }, { 'fieldName': 'ActivistCodeType', 'fieldType': 'T', 'maxTextboxCharacters': 20, 'isCoreField': True, 'availableValues': None }, { 'fieldName': 'Campaign', 'fieldType': 'T', 'maxTextboxCharacters': 150, 'isCoreField': True, 'availableValues': None }] m.get(self.van.connection.uri + 'changedEntityExportJobs/fields/ActivistCodes', json=json) assert_matching_tables( Table(json), self.van.get_changed_entity_resource_fields('ActivistCodes'))
def test_append_to_spreadsheet(self): # BROKEN TEST! append_table = Table([ { 'first': 'Jim', 'last': 'Mitchell' }, { 'first': 'Lucy', 'last': 'Simpson' }, ]) self.google_sheets.append_to_sheet(self.spreadsheet_id, append_table) result_table = self.google_sheets.read_sheet(self.spreadsheet_id) self.assertEqual(append_table.columns, result_table.columns) # We should now have rows from both tables self.assertEqual(self.test_table.num_rows + append_table.num_rows, result_table.num_rows) # First check that we didn't muck with the original data for i in range(self.test_table.num_rows): self.assertEqual(self.test_table.data[i], result_table.data[i]) orig_row_count = self.test_table.num_rows # Then check that we appended the data properly for i in range(append_table.num_rows): self.assertEqual(append_table.data[i], result_table.data[orig_row_count + i]) # Test that we can append to an empty sheet self.google_sheets.add_sheet(self.spreadsheet_id, 'Sheet3') self.google_sheets.append_to_sheet(self.spreadsheet_id, append_table)
def test_get_canvass_responses_input_types(self, m): json = {"inputTypeId": 11, "name": "API"} m.get(self.van.connection.uri + 'canvassResponses/inputTypes', json=json) assert_matching_tables(Table(json), self.van.get_canvass_responses_input_types())
def test_get_bulk_import_mapping_types(self, m): m.get(self.van.connection.uri + 'bulkImportMappingTypes', json=mapping_type) assert_matching_tables(self.van.get_bulk_import_mapping_types(), Table(mapping_type))
def test_from_columns(self): header = ['col1', 'col2'] col1 = [1, 2, 3] col2 = ['a', 'b', 'c'] tbl = Table.from_columns([col1, col2], header=header) self.assertEqual(tbl[0], {'col1': 1, 'col2': 'a'})
def test_from_csv_string(self): path = self.tbl.to_csv() # Pull the file into a string with open(path, 'r') as f: str = f.read() result_tbl = Table.from_csv_string(str) assert_matching_tables(self.tbl, result_tbl)
def setUp(self): self.mysql = MySQL(username='******', password='******', host='test', db='test', port=123) self.tbl = Table([['ID', 'Name', 'Score'], [1, 'Jim', 1.9], [2, 'John', -0.5], [3, 'Sarah', .0004]])
def test_get_max_value(self): date_tbl = Table([['id', 'date_modified'], [1, '2020-01-01'], [2, '1900-01-01']]) self.rs.copy(date_tbl, f'{self.temp_schema}.test_date') # Test return string self.assertEqual(self.rs.get_max_value(f'{self.temp_schema}.test_date', 'date_modified'), '2020-01-01')
def setUp(self): self.google_sheets = GoogleSheets() self.spreadsheet_id = self.google_sheets.create_spreadsheet('Parsons Test') self.test_table = Table([ {'first': 'Bob', 'last': 'Smith'}, {'first': 'Sue', 'last': 'Doe'}, ]) self.google_sheets.overwrite_sheet(self.spreadsheet_id, self.test_table) self.second_sheet_title = "2nd sheet" self.google_sheets.add_sheet(self.spreadsheet_id, self.second_sheet_title) self.second_test_table = Table([ {'city': 'San Francisco', 'state': 'SF'}, {'city': 'Chicago', 'state': 'IL'}, ]) self.google_sheets.overwrite_sheet(self.spreadsheet_id, self.second_test_table, 1)
def test_to_from_redshift(self): # Test the parsons table methods table_name = f'{self.temp_schema}.test_copy' self.tbl.to_redshift(table_name, if_exists='drop') sql = f"SELECT * FROM {table_name} ORDER BY id" result_tbl = Table.from_redshift(sql) # Don't bother checking columns names, since those were tweaked en route to Redshift. assert_matching_tables(self.tbl, result_tbl, ignore_headers=True)