def main(): # Parse command line arguments args = utils.parse_args(REQUIRED_CONFIG_KEYS) # If discover flag was passed, run discovery mode and dump output to stdout if args.discover: Catalog.from_dict(discover_catalog()).dump() # Otherwise run in sync mode elif args.catalog: state = args.state or {} sync(args.config, args.state, args.catalog) elif args.properties: catalog = Catalog.from_dict(args.properties) state = args.state or {} sync(args.config, state, catalog)
def main_impl(): args = parse_args(REQUIRED_CONFIG_KEYS) account_id = args.config['account_id'] access_token = args.config['access_token'] global auth auth = [account_id, access_token] CONFIG.update(args.config) global RESULT_RETURN_LIMIT RESULT_RETURN_LIMIT = CONFIG.get('result_return_limit', RESULT_RETURN_LIMIT) global API API = FacebookAdsApi.init(access_token=access_token) user = fb_user.User(fbid='me') accounts = user.get_ad_accounts() account = None for acc in accounts: if acc['account_id'] == account_id: account = acc if not account: raise TapFacebookException("Couldn't find account with id {}".format(account_id)) if args.discover: do_discover(args.select_all) elif args.properties: catalog = Catalog.from_dict(args.properties) do_sync(account, catalog, args.state) elif args.catalog: do_sync(account, args.catalog, args.state) else: LOGGER.info("No properties were selected")
def do_discover(): raw_schemas = _load_schemas() catalog_entries = [] for stream_name, schema in raw_schemas.items(): stream = STREAM_OBJECTS[stream_name] mdata = metadata.get_standard_metadata( schema=schema, key_properties=stream.key_properties, replication_method=stream.replication_method ) mdata = metadata.to_map(mdata) if stream.replication_key: mdata = metadata.write(mdata, (), 'valid-replication-keys', [stream.replication_key]) for field_name in schema['properties'].keys(): if field_name in stream.key_properties or field_name == stream.replication_key: mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'automatic') else: mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'available') catalog_entry = { 'stream': stream_name, 'tap_stream_id': stream_name, 'schema': schema, 'metadata': metadata.to_list(mdata), 'key_properties': stream.key_properties} catalog_entries.append(catalog_entry) return Catalog.from_dict({'streams': catalog_entries})
def main(): args = utils.parse_args(REQUIRED_CONFIG_KEYS) #NB> this code will only work correctly when the local time is set to UTC because of calls to the timestamp() method. os.environ['TZ'] = 'UTC' # gcloud fails creating temporary tables if it is inside of transactions mysql_config = args.config mysql_config["autocommit"] = True mysql_conn = MySQLConnection(mysql_config) validate_only = args.config.get("validate_only") if not validate_only: log_server_params(mysql_conn) try: if validate_only: validate_connect(mysql_conn) elif args.discover: do_discover(mysql_conn, args.config) elif args.catalog: state = args.state or {} do_sync(mysql_conn, args.config, args.catalog, state) elif args.properties: catalog = Catalog.from_dict(args.properties) state = args.state or {} do_sync(mysql_conn, args.config, catalog, state) else: LOGGER.info("No properties were selected") except Exception as e: raise e
def main_impl(): args = utils.parse_args(REQUIRED_CONFIG_KEYS) account_id = args.config['account_id'] access_token = args.config['access_token'] CONFIG.update(args.config) FacebookAdsApi.init(access_token=access_token) user = fb_user.User(fbid='me') accounts = user.get_ad_accounts() account = None for acc in accounts: if acc['account_id'] == account_id: account = acc if not account: raise TapFacebookException( "Couldn't find account with id {}".format(account_id)) if args.discover: do_discover() elif args.properties: catalog = Catalog.from_dict(args.properties) do_sync(account, catalog, args.state) else: LOGGER.info("No properties were selected")
def do_discover(): raw_schemas = _load_schemas() catalog_entries = [] for stream_name, schema in raw_schemas.items(): # create and add catalog entry stream = STREAM_OBJECTS[stream_name] catalog_entry = { "stream": stream_name, "tap_stream_id": stream_name, "schema": schema, "metadata": metadata.get_standard_metadata( schema=schema, key_properties=stream.key_properties, valid_replication_keys=stream.replication_keys, replication_method=stream.replication_method, ), "key_properties": stream.key_properties, } catalog_entries.append(catalog_entry) return Catalog.from_dict({"streams": catalog_entries})
def do_discover(client): raw_schemas = _load_schemas() catalog_entries = [] major_ver = client.request_feed("gbfs_versions").get("version")[0] feed_names = client.feed_names for feed_name in feed_names: versioned_feed = f"{feed_name}_v{major_ver}" # create and add catalog entry stream = STREAM_OBJECTS.get(versioned_feed) if stream is None: continue schema = raw_schemas[versioned_feed] catalog_entry = { "stream": versioned_feed, "tap_stream_id": versioned_feed, "schema": schema, "metadata": metadata.get_standard_metadata( schema=schema, key_properties=stream.key_properties, valid_replication_keys=stream.replication_keys, replication_method=stream.replication_method, ), "key_properties": stream.key_properties, } catalog_entries.append(catalog_entry) return Catalog.from_dict({"streams": catalog_entries})
def test_main_with_state(mocker, mock_config, mock_catalog, mock_state, mock_context): """ Ensure that the correct functions are called when tap is executed with a state file. """ catalog = Catalog.from_dict(mock_catalog) mocker.patch( 'tap_intacct.singer.utils.parse_args', return_value=argparse.Namespace( config=mock_config, state=mock_state, catalog=catalog, discover=False, ), ) mock_get_client = mocker.patch('tap_intacct.get_client', autospec=True) mock_do_sync = mocker.patch('tap_intacct.do_sync', autospec=True) tap_intacct.main() mock_context.config.update.assert_called_once_with(mock_config) mock_context.state.update.assert_called_once_with(mock_state) mock_context.catalog.update.assert_called_once_with(catalog.to_dict()) mock_get_client.assert_called_once() mock_do_sync.assert_called_once() mock_context.print_counts.assert_called_once()
def main(): args = parse_args(REQUIRED_CONFIG_KEYS) CONFIG.update(args.config) # Overwrite config specs with commandline args if present if args.start_datetime: CONFIG["start_datetime"] = args.start_datetime if args.end_datetime: CONFIG["end_datetime"] = args.end_datetime if not CONFIG.get("end_datetime"): CONFIG["end_datetime"] = datetime.datetime.utcnow().isoformat() # If discover flag was passed, run discovery mode and dump output to stdout if args.discover: catalog = discover(CONFIG) print(json.dumps(catalog, indent=2)) # Otherwise run in sync mode else: if args.catalog: catalog = args.catalog else: catalog = Catalog.from_dict(discover(CONFIG)) sync(CONFIG, args.state, catalog)
def do_discover(): raw_schemas = _load_schemas() catalog_entries = [] for stream_name, schema in raw_schemas.items(): # create and add catalog entry stream = STREAM_OBJECTS[stream_name] mdata = metadata.get_standard_metadata( schema=schema, key_properties=stream.key_properties, valid_replication_keys=stream.replication_keys, replication_method=stream.replication_method, ) mdata = metadata.to_map(mdata) for field_name in stream.replication_keys: metadata.write(mdata, ('properties', field_name), 'inclusion', 'automatic') catalog_entry = { "stream": stream_name, "tap_stream_id": stream_name, "schema": schema, "metadata": metadata.to_list(mdata), "key_properties": stream.key_properties, } catalog_entries.append(catalog_entry) return Catalog.from_dict({"streams": catalog_entries})
def main_impl(): args = get_args() if args.config: LOGGER.info("Config json found") config = load_file(args.config) elif "xero_config" in env: LOGGER.info("Env var config found") config = json.loads(env["xero_config"]) else: critical("No config found, aborting") return if args.properties: LOGGER.info("Catalog found") args.properties = load_file(args.properties) catalog = Catalog.from_dict( args.properties) if args.properties else discover() if args.state: state = args.state else: state = {} if args.discover: discover().dump() print() else: sync(Context(config, state, catalog))
def test_getting_streams_to_sync(self): catalog_entry= { 'streams': [ { 'stream': 'adcreative', 'tap_stream_id': 'adcreative', 'schema': {}, 'metadata': [{'breadcrumb': (), 'metadata': {'selected': True}}] }, { 'stream': 'ads', 'tap_stream_id': 'ads', 'schema': {}, 'metadata': [{'breadcrumb': (), 'metadata': {'selected': False}}] } ] } catalog = Catalog.from_dict(catalog_entry) streams_to_sync = tap_facebook.get_streams_to_sync(None, catalog, None) names_to_sync = [stream.name for stream in streams_to_sync] self.assertEqual(['adcreative'], names_to_sync)
def __init__( self, config: Union[Dict[str, Any], Path], state: Union[None, Dict[str, Any], Path] = None, catalog: Union[None, Dict[str, Any], Catalog, Path] = None, discover: bool = False, **kwargs, ): self.catalog_path = self.state_path = self.config_path = None if isinstance(catalog, Path): self.catalog_path = str(catalog) catalog = Catalog.load(catalog) elif isinstance(catalog, dict): catalog = Catalog.from_dict(catalog) if isinstance(config, Path): self.config_path = str(config) config = load_json(config) if isinstance(state, Path): self.state_path = state state = load_json(state) self.config = config self.state = state self.catalog = catalog self.discover = discover for name, val in kwargs.items(): setattr(self, name, val)
def discover(): streams = [] for tap_stream_id in schemas.STATIC_SCHEMA_STREAM_IDS: #print("tap stream id=",tap_stream_id) key_properties = schemas.PK_FIELDS[tap_stream_id] schema = schemas.load_schema(tap_stream_id) replication_method = schemas.REPLICATION_METHODS[tap_stream_id].get("replication_method") replication_keys = schemas.REPLICATION_METHODS[tap_stream_id].get("replication_keys") meta = metadata.get_standard_metadata(schema=schema, key_properties=key_properties, replication_method=replication_method, valid_replication_keys=replication_keys) meta = metadata.to_map(meta) if replication_keys: meta = metadata.write(meta, ('properties', replication_keys[0]), 'inclusion', 'automatic') meta = metadata.to_list(meta) streams.append({ 'stream': tap_stream_id, 'tap_stream_id': tap_stream_id, 'key_properties': key_properties, 'schema': schema, 'metadata': meta, 'replication_method': replication_method, 'replication_key': replication_keys[0] if replication_keys else None }) return Catalog.from_dict({'streams': streams})
def do_discover(): raw_schemas = _load_schemas() catalog_entries = [] for stream_name, stream in STREAM_OBJECTS.items(): # create and add catalog entry schema = raw_schemas[stream_name] mdata = metadata.to_map( metadata.get_standard_metadata( schema=schema, key_properties=stream.key_properties, valid_replication_keys=stream.replication_keys, replication_method=stream.replication_method, )) # Set the replication_key MetaData to automatic as well mdata = metadata.write(mdata, ('properties', stream.replication_keys[0]), 'inclusion', 'automatic') catalog_entry = { "stream": stream_name, "tap_stream_id": stream_name, "schema": schema, "metadata": metadata.to_list(mdata), "key_properties": stream.key_properties } catalog_entries.append(catalog_entry) return Catalog.from_dict({"streams": catalog_entries})
def main_impl(): args = utils.parse_args(REQUIRED_CONFIG_KEYS) account_id = args.config['account_id'] account_ids = account_id.split(",") access_token = args.config['access_token'] CONFIG.update(args.config) global RESULT_RETURN_LIMIT RESULT_RETURN_LIMIT = CONFIG.get('result_return_limit', RESULT_RETURN_LIMIT) global API API = FacebookAdsApi.init(access_token=access_token) user = fb_user.User(fbid='me') accounts = user.get_ad_accounts() selected_accounts = [] for acc in accounts: if acc['account_id'] in account_ids: selected_accounts.append(acc) if len(selected_accounts) < 1: raise TapFacebookException( "Couldn't find account with id {}".format(account_id)) if args.discover: do_discover() elif args.properties: catalog = Catalog.from_dict(args.properties) for account in selected_accounts: singer.logger.log_info("syncing account " + str(account["account_id"])) do_sync(account, catalog, args.state) else: LOGGER.info("No properties were selected")
def discover() -> Catalog: """ Constructs a singer Catalog object based on the schemas and metadata. """ schemas, schemas_metadata = get_schemas() streams = [] for schema_name, schema in schemas.items(): schema_meta = schemas_metadata[schema_name] catalog_entry = { 'stream': schema_name, 'tap_stream_id': schema_name, 'schema': schema, 'key_properties': _get_key_properties_from_meta(schema_meta), 'replication_method': _get_replication_method_from_meta(schema_meta), 'replication_key': _get_replication_key_from_meta(schema_meta), 'metadata': schema_meta } streams.append(catalog_entry) return Catalog.from_dict({'streams': streams})
def set_catalog(self): self.catalog = Catalog.from_dict(self.args.properties) \ if self.args.properties else self.discover() self.selected_catalog = [ s for s in self.catalog.streams if stream_is_selected(s) ]
def main_impl(): args = utils.parse_args(REQUIRED_CONFIG_KEYS) if args.discover: discover(args.config).dump() print() else: catalog = Catalog.from_dict(args.properties) \ if args.properties else discover(args.config) sync(Context(args.config, args.state, catalog))
def main_impl(): args = parse_args(REQUIRED_CONFIG_KEYS) ctx = Context(args.config, args.state) if args.discover: discover(ctx).dump() else: ctx.catalog = Catalog.from_dict(args.properties) \ if args.properties else discover(ctx) sync(ctx)
def main(): args = utils.parse_args(REQUIRED_CONFIG_KEYS) ctx = Context(args.config, args.state) if args.discover: catalog = discover(ctx) json.dump(catalog.to_dict(), sys.stdout) else: ctx.catalog = Catalog.from_dict(args.properties) \ if args.properties else discover(ctx) sync(ctx)
def discover(): raw_schemas = load_schemas() streams = [] for schema_name, schema in raw_schemas.items(): stream_metadata = [ { "metadata": { "inclusion": "available", "table-key-properties": ["id"], "forced-replication-method": MODEL_REPLICATION_METHOD[STREAM_MODEL_MAP[schema_name]] }, "breadcrumb": [] }, { "metadata": { "inclusion": "automatic", }, "breadcrumb": ["properties", "id"] }, { "metadata": { "inclusion": "automatic", }, "breadcrumb": ["properties", "create_date"] }, { "metadata": { "inclusion": "automatic", }, "breadcrumb": ["properties", "write_date"] }, { "metadata": { "inclusion": "available", }, "breadcrumb": ["properties", "rec_name"] }, ] stream_key_properties = ['id'] schema["properties"].update(DEFAULT_PROPERTIES) # create and add catalog entry catalog_entry = { 'stream': schema_name, 'tap_stream_id': schema_name, 'schema': schema, 'metadata': stream_metadata, 'key_properties': stream_key_properties, } streams.append(catalog_entry) return Catalog.from_dict({'streams': streams})
def main(): args = utils.parse_args(REQUIRED_CONFIG_KEYS) ctx = Context(args.config, args.state) ctx.update_start_date_bookmark("updated_at") if args.discover: discover(ctx).dump() print() else: ctx.catalog = Catalog.from_dict(args.properties) \ if args.properties else discover(ctx) sync(ctx)
def main(): args = utils.parse_args(REQUIRED_CONFIG_KEYS) atx = Context(args.config, args.state) if args.discover: # the schema is static from file so we don't need to pass in atx for connection info. catalog = discover() json.dump(catalog.to_dict(), sys.stdout) else: atx.catalog = Catalog.from_dict(args.properties) \ if args.properties else discover() sync(atx)
def main_impl(): args = utils.parse_args(REQUIRED_CONFIG_KEYS) common.setup_port_configuration(args.config) if args.discover: discovery.discover(args.config).dump() print() elif args.catalog: do_sync(args, args.catalog) elif args.properties: do_sync(args, Catalog.from_dict(args.properties)) else: LOGGER.info("No properties were selected")
def discover(client): # discover catalog schema raw_schemas = _load_schemas() catalog_entries = [] for stream_name, schema in raw_schemas.items(): # create and add catalog entry stream = STREAM_OBJECTS[stream_name] # Add custom fields if stream.custom_fields: response = client._make_request('GET', stream.custom_fields) for page in response: for field in page: field_name = field.get('name', False) if field.get('default', False): continue # add mapping ex. custom_number -> number field_type = field.get('type', False) schema["properties"][field_name] = map_type(field_type) if field_type == 'nested_field': for nested_field in field.get('nested_ticket_fields', []): schema["properties"][ nested_field['name']] = map_type(field_type) # remove custom_fields parent as they are added directly in schema schema["properties"].pop('custom_fields') catalog_entry = { "stream": stream_name, "tap_stream_id": stream_name, "schema": schema, "metadata": metadata.get_standard_metadata( schema=schema, key_properties=stream.key_properties, valid_replication_keys=stream.replication_keys, replication_method=stream.replication_method, ), "key_properties": stream.key_properties, "replication_key": stream.replication_keys } catalog_entries.append(catalog_entry) return Catalog.from_dict({"streams": catalog_entries})
def main_impl(): args = utils.parse_args(REQUIRED_CONFIG_KEYS) con = connect_with_backoff(args.config) if args.discover: do_discover(con) elif args.catalog: state = args.state or {} do_sync(con, args.catalog, state) elif args.properties: catalog = Catalog.from_dict(args.properties) state = args.state or {} do_sync(con, catalog, state) else: LOGGER.info("No properties were selected")
def main_impl(): args = utils.parse_args(REQUIRED_CONFIG_KEYS) connection = MySQLConnection(args.config) connect_with_backoff(connection) warnings = [] with connection.cursor() as cur: try: cur.execute('SET @@session.time_zone="+0:00"') except pymysql.err.InternalError as e: warnings.append( 'Could not set session.time_zone. Error: ({}) {}'.format( *e.args)) try: cur.execute('SET @@session.wait_timeout=2700') except pymysql.err.InternalError as e: warnings.append( 'Could not set session.wait_timeout. Error: ({}) {}'.format( *e.args)) try: cur.execute('SET @@session.innodb_lock_wait_timeout=2700') except pymysql.err.InternalError as e: warnings.append( 'Could not set session.innodb_lock_wait_timeout. Error: ({}) {}' .format(*e.args)) if warnings: LOGGER.info(( "Encountered non-fatal errors when configuring MySQL session that could " "impact performance:")) for w in warnings: LOGGER.warning(w) log_server_params(connection) if args.discover: do_discover(connection) elif args.catalog: state = args.state or {} do_sync(connection, args.config, args.catalog, state) elif args.properties: catalog = Catalog.from_dict(args.properties) state = args.state or {} do_sync(connection, args.config, catalog, state) else: LOGGER.info("No properties were selected")
def main_impl(): args = utils.parse_args(REQUIRED_CONFIG_KEYS) mssql_conn = MSSQLConnection(args.config) log_server_params(mssql_conn) if args.discover: do_discover(mssql_conn, args.config) elif args.catalog: state = args.state or {} do_sync(mssql_conn, args.config, args.catalog, state) elif args.properties: catalog = Catalog.from_dict(args.properties) state = args.state or {} do_sync(mssql_conn, args.config, catalog, state) else: LOGGER.info("No properties were selected")
def main(): args = utils.parse_args(REQUIRED_CONFIG_KEYS) CONFIG.update(args.config) connection = open_connection(args.config) db_schema = args.config.get('schema') or 'public' if args.discover: do_discover(connection, db_schema) elif args.catalog: state = build_state(args.state, args.catalog) do_sync(connection, db_schema, args.catalog, state) elif args.properties: catalog = Catalog.from_dict(args.properties) state = build_state(args.state, catalog) do_sync(connection, db_schema, catalog, state) else: LOGGER.info("No properties were selected")