Esempio n. 1
0
def main_impl():
    try:
        args = utils.parse_args(REQUIRED_CONFIG_KEYS)
        account_id = args.config['account_id']
        access_token = args.config['access_token']

        CONFIG.update(args.config)

        global RESULT_RETURN_LIMIT
        RESULT_RETURN_LIMIT = CONFIG.get('result_return_limit',
                                         RESULT_RETURN_LIMIT)

        global API
        API = FacebookAdsApi.init(access_token=access_token)
        user = fb_user.User(fbid='me')

        accounts = user.get_ad_accounts()
        account = None
        for acc in accounts:
            if acc['account_id'] == account_id:
                account = acc
        if not account:
            raise SingerConfigurationError(
                "Couldn't find account with id {}".format(account_id))
    except FacebookError as fb_error:
        raise_from(SingerConfigurationError, fb_error)

    if args.discover:
        try:
            do_discover()
        except FacebookError as fb_error:
            raise_from(SingerDiscoveryError, fb_error)
    elif args.properties:
        catalog = Catalog.from_dict(args.properties)
        try:
            do_sync(account, catalog, args.state)
        except FacebookError as fb_error:
            raise_from(SingerSyncError, fb_error)
    else:
        LOGGER.info("No properties were selected")
Esempio n. 2
0
def main_impl():
    args = utils.parse_args(REQUIRED_CONFIG_KEYS)
    connection = open_connection(args.config)
    warnings = []
    with connection.cursor() as cur:
        try:
            cur.execute('SET @@session.time_zone="+0:00"')
        except pymysql.err.InternalError as e:
            warnings.append('Could not set session.time_zone. Error: ({}) {}'.format(*e.args))

        try:
            cur.execute('SET @@session.wait_timeout=2700')
        except pymysql.err.InternalError as e:
            warnings.append('Could not set session.wait_timeout. Error: ({}) {}'.format(*e.args))

        try:
            cur.execute('SET @@session.innodb_lock_wait_timeout=2700')
        except pymysql.err.InternalError as e:
            warnings.append(
                'Could not set session.innodb_lock_wait_timeout. Error: ({}) {}'.format(*e.args)
                )

    if warnings:
        LOGGER.info(("Encountered non-fatal errors when configuring MySQL session that could "
                     "impact performance:"))
    for w in warnings:
        LOGGER.warning(w)

    log_server_params(connection)
    if args.discover:
        do_discover(connection)
    elif args.catalog:
        state = build_state(args.state, args.catalog)
        do_sync(connection, args.catalog, state)
    elif args.properties:
        catalog = Catalog.from_dict(args.properties)
        state = build_state(args.state, catalog)
        do_sync(connection, catalog, state)
    else:
        LOGGER.info("No properties were selected")
Esempio n. 3
0
def discover(service):
    catalog = Catalog([])
    for entity_name, entity in service.entities.items():
        if entity_name not in selected_tables:
            continue
        schema_dict, metadata, pks = get_schema(entity.__odata_schema__)
        metadata.append({"breadcrumb": [], "metadata": {"selected": True}})
        schema = Schema.from_dict(schema_dict)

        catalog.streams.append(
            CatalogEntry(
                stream=entity_name,
                tap_stream_id=entity_name,
                key_properties=pks,
                schema=schema,
                metadata=metadata,
                replication_method="INCREMENTAL" if schema_dict.get(
                    "properties", None).get("createdon", None) else
                "FULL_TABLE",
            ))

    return catalog
Esempio n. 4
0
def generate_catalog(streams):

    catalog = Catalog([])

    for stream in streams:
        schema = stream.load_schema()

        mdata = metadata.new()
        mdata = metadata.get_standard_metadata(
            schema=schema,
            key_properties=stream.key_properties,
            valid_replication_keys=stream.replication_key or None,
            replication_method=stream.replication_method or None)

        catalog.streams.append(
            CatalogEntry(stream=stream.name,
                         tap_stream_id=stream.name,
                         key_properties=stream.key_properties,
                         schema=Schema.from_dict(schema),
                         metadata=mdata))

    return catalog
def sync(  # noqa: WPS210, WPS213
    wp: WordPressSupportForums,
    catalog: Catalog,
) -> None:
    """Sync data from tap source.

    Arguments:
        wp {WordPressSupportForums} -- WordPressSupportForums client
        catalog {Catalog} -- Stream catalog
    """
    # For every stream in the catalog
    LOGGER.info('Sync')

    # Only selected streams are synced, whether a stream is selected is
    # determined by whether the key-value: "selected": true is in the schema
    # file.
    for stream in catalog.get_selected_streams({}):
        LOGGER.info(f'Syncing stream: {stream.tap_stream_id}')

        # Write the schema
        singer.write_schema(
            stream_name=stream.tap_stream_id,
            schema=stream.schema.to_dict(),
            key_properties=stream.key_properties,
        )

        # Every stream has a corresponding method in the WordPress Stats object
        # The stream: mysql will call: wp.mysql
        tap_data: Callable = getattr(wp, stream.tap_stream_id)

        # The tap_data method yields rows of data from the API
        for row in tap_data():

            # Write a row to the stream
            singer.write_record(
                stream.tap_stream_id,
                row,
                time_extracted=datetime.now(timezone.utc),
            )
Esempio n. 6
0
 def test_should_output_no_records_given_no_records_available(
         self, mock_stdout, requests_mock):
     requests_mock.get(
         "https://api.nikabot.com/api/v1/users?limit=1000&page=0",
         json=json.loads(EMPTY_RESPONSE))
     config = {"access_token": "my-access-token", "page_size": 1000}
     state = {}
     catalog = Catalog(streams=[
         CatalogEntry(
             tap_stream_id="users",
             stream="users",
             schema=Schema.from_dict({}),
             key_properties=["id"],
             metadata=[{
                 "breadcrumb": [],
                 "metadata": {
                     "selected": True
                 }
             }],
         )
     ])
     sync(config, state, catalog)
     assert mock_stdout.mock_calls == [
         call(
             '{"type": "SCHEMA", "stream": "users", "schema": {}, "key_properties": ["id"]}\n'
         )
     ]
     assert LOGGER.info.mock_calls == [
         call("Syncing stream: %s", "users"),
         call(
             "Making %s request to %s with params %s",
             "GET",
             "https://api.nikabot.com/api/v1/users",
             {
                 "limit": "1000",
                 "page": "0"
             },
         ),
     ]
Esempio n. 7
0
def discover(config):
    streams = []
    for table_spec in config['tables']:
        try:
            modified_since = dateutil.parser.parse(table_spec['start_date'])
            target_files = file_utils.get_matching_objects(
                table_spec, modified_since)
            sample_rate = table_spec.get('sample_rate', 5)
            max_sampling_read = table_spec.get('max_sampling_read', 1000)
            max_sampled_files = table_spec.get('max_sampled_files', 50)
            samples = file_utils.sample_files(table_spec,
                                              target_files,
                                              sample_rate=sample_rate,
                                              max_records=max_sampling_read,
                                              max_files=max_sampled_files)
            schema = generate_schema(table_spec, samples)
            stream_metadata = []
            key_properties = table_spec.get('key_properties', [])
            streams.append(
                CatalogEntry(
                    tap_stream_id=table_spec['name'],
                    stream=table_spec['name'],
                    schema=schema,
                    key_properties=key_properties,
                    metadata=stream_metadata,
                    replication_key=None,
                    is_view=None,
                    database=None,
                    table=None,
                    row_count=None,
                    stream_alias=None,
                    replication_method=None,
                ))
        except Exception as err:
            LOGGER.error(
                f"Unable to write Catalog entry for '{table_spec['name']}' - it will be skipped due to error {err}"
            )

    return Catalog(streams)
Esempio n. 8
0
 def test_should_output_records(self, mock_stdout, requests_mock):
     requests_mock.get("https://api.nikabot.com/api/v1/teams", json=json.loads(TEAMS_RESPONSE))
     config = {"access_token": "my-access-token", "page_size": 1000}
     state = {}
     catalog = Catalog(
         streams=[
             CatalogEntry(
                 tap_stream_id="teams",
                 stream="teams",
                 schema=Schema.from_dict({}),
                 key_properties=["id"],
                 metadata=[{"breadcrumb": [], "metadata": {"selected": True}}],
             )
         ]
     )
     sync(config, state, catalog)
     assert mock_stdout.mock_calls == [
         call('{"type": "SCHEMA", "stream": "teams", "schema": {}, "key_properties": ["id"]}\n'),
         call(
             '{"type": "RECORD", "stream": "teams", "record": {"id": "5d6ca50762a07c00045125fb", "domain": "pageup", "bot_token": "e31d3b7ae51ff1feec8be578f23eb017e8143f66a7a085342c664544b81618ec41b87810d61a9c1f6133fe0c7d88aa3976232bb2a2665c4f89c38058b51cd20c", "activated_by": "U6K26HMGV", "status": "ACTIVE", "platform_id": "T034F9NPW", "created_at": "2019-09-02T05:13:43.151", "subscription": {"active_until": "2020-07-08T23:59:59", "status": "active", "number_of_users": 69, "subscriber_id": "U93KT77T6"}, "icon": {"image_34": "https://avatars.slack-edge.com/2017-09-15/241678543093_b2ad80be9268cdbd89c3_34.png", "image_44": "https://avatars.slack-edge.com/2017-09-15/241678543093_b2ad80be9268cdbd89c3_44.png", "image_68": "https://avatars.slack-edge.com/2017-09-15/241678543093_b2ad80be9268cdbd89c3_68.png", "image_88": "https://avatars.slack-edge.com/2017-09-15/241678543093_b2ad80be9268cdbd89c3_88.png", "image_102": "https://avatars.slack-edge.com/2017-09-15/241678543093_b2ad80be9268cdbd89c3_102.png", "image_132": "https://avatars.slack-edge.com/2017-09-15/241678543093_b2ad80be9268cdbd89c3_132.png", "image_230": "https://avatars.slack-edge.com/2017-09-15/241678543093_b2ad80be9268cdbd89c3_230.png", "image_original": "https://avatars.slack-edge.com/2017-09-15/241678543093_b2ad80be9268cdbd89c3_original.png"}}, "time_extracted": "2020-01-01T00:00:00.000000Z"}\n'
         ),
     ]
Esempio n. 9
0
def expected_subset_catalog_selected_default_col():
    return Catalog.from_dict({
        'streams': [{
            'database_name': 'FakeDB',
            'table_name': 'category',
            'tap_stream_id': 'dev-category',
            'is_view': False,
            'stream': 'category',
            'schema': {
                'type': 'object',
                'properties': {
                    'id': {
                        'minimum': -2147483648,
                        'type': 'integer',
                        'maximum': 2147483647,
                        'inclusion': 'available'
                    }
                }
            },
            'metadata': [
                {
                    'breadcrumb': (),
                    'metadata': {
                        'selected': True
                    }
                },
                {
                    'breadcrumb': (
                        'properties',
                        'id'
                    ),
                    'metadata': {
                        'selected-by-default': True,
                        'sql-datatype': 'int2'
                    }
                }
            ]
        }]
    })
Esempio n. 10
0
    def test_getting_streams_to_sync(self):
        annotated_schemas = {
            'streams': [{
                'stream': 'adcreative',
                'tap_stream_id': 'adcreative',
                'schema': {
                    'selected': True
                }
            }, {
                'stream': 'ads',
                'tap_stream_id': 'ads',
                'schema': {
                    'selected': False
                }
            }]
        }

        catalog = Catalog.from_dict(annotated_schemas)

        streams_to_sync = tap_facebook.get_streams_to_sync(None, catalog, None)
        names_to_sync = [stream.name for stream in streams_to_sync]
        self.assertEqual(['adcreative'], names_to_sync)
Esempio n. 11
0
def discover():
    schemas, field_metadata = get_schemas()
    catalog = Catalog([])

    for stream_name, schema_dict in schemas.items():
        schema = Schema.from_dict(schema_dict)
        mdata = field_metadata[stream_name]

        table_metadata = {}
        for entry in mdata:
            if entry.get('breadcrumb') == ():
                table_metadata = entry.get('metadata', {})
        key_properties = table_metadata.get('table-key-properties')

        catalog.streams.append(
            CatalogEntry(stream=stream_name,
                         tap_stream_id=stream_name,
                         key_properties=key_properties,
                         schema=schema,
                         metadata=mdata))

    return catalog
Esempio n. 12
0
def discover():
    schemas, field_metadata = get_schemas()
    catalog = Catalog([])

    flat_streams = flatten_streams()
    for stream_name, schema_dict in schemas.items():
        schema = Schema.from_dict(schema_dict)
        mdata = metadata.to_map(field_metadata[stream_name])

        stream = flat_streams.get(stream_name, {})
        if stream.get('replication_method') == 'INCREMENTAL':
            for field_name in stream.get('replication_keys'):
                metadata.write(mdata, ('properties', field_name), 'inclusion',
                               'automatic')
        catalog.streams.append(
            CatalogEntry(stream=stream_name,
                         tap_stream_id=stream_name,
                         key_properties=stream.get('key_properties', None),
                         schema=schema,
                         metadata=metadata.to_list(mdata)))

    return catalog
Esempio n. 13
0
def discover(reports):
    schemas, field_metadata = get_schemas(reports)
    catalog = Catalog([])

    for stream_name, schema_dict in schemas.items():
        schema = Schema.from_dict(schema_dict)
        mdata = field_metadata[stream_name]

        # table_metadata = {}
        for entry, value in mdata.items():
            if entry == ():
                table_metadata = value
        key_properties = table_metadata.get('table-key-properties')

        catalog.streams.append(
            CatalogEntry(stream=stream_name,
                         tap_stream_id=stream_name,
                         key_properties=key_properties,
                         schema=schema,
                         metadata=metadata.to_list(mdata)))

    return catalog
Esempio n. 14
0
def discover(config, config_path):

    schemas, schemas_metadata = get_schemas(config, config_path)
    streams = []

    for schema_name, schema in schemas.items():
        schema_meta = schemas_metadata[schema_name]

        catalog_entry = {
            'stream': schema_name,
            'tap_stream_id': schema_name,
            'schema': schema,
            'key_properties': _get_key_properties_from_meta(schema_meta),
            'replication_method':
            _get_replication_method_from_meta(schema_meta),
            'replication_key': _get_replication_key_from_meta(schema_meta),
            'metadata': schema_meta
        }

        streams.append(catalog_entry)

    return Catalog.from_dict({'streams': streams})
Esempio n. 15
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument("-p",
                        "--properties",
                        help="Catalog file with fields selected")
    parser.add_argument("-c", "--config", help="Optional config file")
    parser.add_argument("-s", "--state", help="State file")
    parser.add_argument(
        "-d",
        "--discover",
        help="Build a catalog from the underlying schema",
        action="store_true",
    )

    args = parser.parse_args()
    if args.config:
        LOGGER.info("Config json found")
        config = load_file(args.config)
    elif "typeform_config" in env:
        LOGGER.info("Env var config found")
        config = json.loads(env["typeform_config"])
    else:
        LOGGER.critical("No config found, aborting run")
        return

    properties = load_file(args.properties)
    state = load_file(args.state)

    atx = Context(config, state)
    if args.discover:
        # the schema is static from file so we don't need to pass in atx for connection info.
        catalog = discover()
        json.dump(catalog.to_dict(), sys.stdout)
    else:
        atx.catalog = Catalog.from_dict(
            properties) if args.properties else discover()
        sync(atx)
Esempio n. 16
0
def resolve_catalog(discovered, catalog, state):
    streams = list(filter(entry_is_selected, catalog.streams))

    currently_syncing = singer.get_currently_syncing(state)
    if currently_syncing:
        streams = dropwhile(lambda s: s.tap_stream_id != currently_syncing,
                            streams)

    result = Catalog(streams=[])

    # Iterate over the streams in the input catalog and match each one up
    # with the same stream in the discovered catalog.
    for catalog_entry in streams:
        discovered_table = discovered.get_stream(catalog_entry.tap_stream_id)
        if not discovered_table:
            LOGGER.warning(
                'Database {} table {} selected but does not exist'.format(
                    catalog_entry.database, catalog_entry.table))
            continue
        selected = get_selected_properties(catalog_entry)

        # These are the columns we need to select
        columns = desired_columns(selected, discovered_table.schema)

        schema = Schema(type='object',
                        properties={
                            col: discovered_table.schema.properties[col]
                            for col in columns
                        })

        result.streams.append(
            CatalogEntry(tap_stream_id=catalog_entry.tap_stream_id,
                         stream=catalog_entry.stream,
                         table=catalog_entry.table,
                         schema=schema,
                         metadata=catalog_entry.metadata))

    return result
Esempio n. 17
0
def discover():
    raw_schemas = load_schemas()
    streams = []
    for stream_id, schema in raw_schemas.items():
        # TODO: populate any metadata and stream's key properties here..
        stream_metadata = property.get_stream_metadata(schema)
        key_properties = property.get_key_properties(stream_id)
        streams.append(
            CatalogEntry(
                tap_stream_id=stream_id,
                stream=stream_id,
                schema=schema,
                key_properties=key_properties,
                metadata=stream_metadata,
                replication_key=None,
                is_view=None,
                database=None,
                table=None,
                row_count=None,
                stream_alias=None,
                replication_method=None,
            ))
    return Catalog(streams)
Esempio n. 18
0
def discover(client, spreadsheet_id):
    schemas, field_metadata = get_schemas(client, spreadsheet_id)
    catalog = Catalog([])

    for stream_name, schema_dict in schemas.items():
        schema = Schema.from_dict(schema_dict)
        mdata = field_metadata[stream_name]
        key_properties = None
        for mdt in mdata:
            table_key_properties = mdt.get('metadata',
                                           {}).get('table-key-properties')
            if table_key_properties:
                key_properties = table_key_properties

        catalog.streams.append(
            CatalogEntry(stream=stream_name,
                         tap_stream_id=stream_name,
                         key_properties=STREAMS.get(stream_name, {}).get(
                             'key_properties', key_properties),
                         schema=schema,
                         metadata=mdata))

    return catalog
Esempio n. 19
0
def discover(config):
    client = Client(config)
    ensure_credentials_are_authorized(client)
    include_account_stream = is_account_endpoint_authorized(client)
    streams = []
    for _, stream in STREAM_OBJECTS.items():
        if (not include_account_stream and stream.tap_stream_id
                == STREAM_OBJECTS['account'].tap_stream_id):
            continue
        raw_schema = load_schema(stream.tap_stream_id)
        schema = Schema.from_dict(raw_schema)
        streams.append(
            CatalogEntry(stream=stream.tap_stream_id,
                         tap_stream_id=stream.tap_stream_id,
                         key_properties=stream.pk_fields,
                         schema=schema,
                         metadata=metadata.get_standard_metadata(
                             schema=raw_schema,
                             schema_name=stream.tap_stream_id,
                             key_properties=stream.pk_fields,
                             valid_replication_keys=stream.replication_keys,
                             replication_method=stream.replication_method)))
    return Catalog(streams)
Esempio n. 20
0
def discover() -> Catalog:
    raw_schemas = load_schemas()
    streams = []
    for stream_id, schema in raw_schemas.items():
        stream_metadata = get_stream_metadata(stream_id, schema.to_dict())
        key_properties = get_key_properties(stream_id)

        streams.append(
            CatalogEntry(
                tap_stream_id=stream_id,
                stream=stream_id,
                schema=schema,
                key_properties=key_properties,
                metadata=stream_metadata,
                replication_key=get_replication_key(stream_id),
                is_view=None,
                database=None,
                table=None,
                row_count=None,
                stream_alias=None,
                replication_method=get_replication_method(stream_id),
            ))
    return Catalog(streams)
Esempio n. 21
0
def test_bookmarks(mock_write_state):
    from singer.catalog import Catalog
    from tap_mambu.tap_mambu_refactor.tap_processors.processor import TapProcessor

    catalog = Catalog.load(f"{FIXTURES_PATH}/processor_catalog.json")
    client_mock = MagicMock()
    processor = TapProcessor(catalog=catalog,
                             stream_name="loan_accounts",
                             client=client_mock,
                             config=config_json,
                             state={'currently_syncing': 'loan_accounts'},
                             sub_type="self",
                             generators=[GeneratorMock([])])

    processor.write_bookmark()

    expected_state = {
        'currently_syncing': 'loan_accounts',
        'bookmarks': {
            'loan_accounts': '2021-06-01T00:00:00Z'
        }
    }
    mock_write_state.assert_called_once_with(expected_state)
Esempio n. 22
0
def main_impl():
   args = utils.parse_args(REQUIRED_CONFIG_KEYS)
   conn_config = {'user': args.config['user'],
                  'password': args.config['password'],
                  'host': args.config['host'],
                  'port': args.config['port'],
                  'sid':  args.config['sid']}

   if args.config.get('scn_window_size'):
      log_miner.SCN_WINDOW_SIZE=int(args.config['scn_window_size'])
   if args.discover:
      filter_schemas_prop = args.config.get('filter_schemas')
      filter_schemas = []
      if args.config.get('filter_schemas'):
         filter_schemas = args.config.get('filter_schemas').split(',')
      do_discovery(conn_config, filter_schemas)

   elif args.properties:
      state = args.state

      # Sort the properties
      streams = args.properties['streams']
      for stream in streams:
         new_properties = {}
         old_properties = stream['schema']['properties']
         order = stream['column_order']

         for column in order:
            new_properties[column] = old_properties[column]

         stream['schema']['properties'] = new_properties

      args.catalog = Catalog.from_dict(args.properties)
      
      do_sync(conn_config, args.catalog, args.config.get('default_replication_method'), state)
   else:
      LOGGER.info("No properties were selected")
Esempio n. 23
0
def discover(ctx):
    LOGGER.info("Running discover")
    use_event_log = has_access_to_event_log(ctx)
    catalog = Catalog([])
    for tap_stream_id in streams_.stream_ids:
        if not use_event_log and tap_stream_id == schemas.IDS.EVENT_LOG:
            continue
        schema_dict = schemas.load_schema(ctx, tap_stream_id)
        schema = Schema.from_dict(schema_dict)
        mdata = metadata.get_standard_metadata(
            schema_dict, key_properties=schemas.PK_FIELDS[tap_stream_id])
        mdata = metadata.to_map(mdata)

        for field_name in schema_dict['properties'].keys():
            mdata = metadata.write(mdata, ('properties', field_name),
                                   'inclusion', 'automatic')

        catalog.streams.append(
            CatalogEntry(stream=tap_stream_id,
                         tap_stream_id=tap_stream_id,
                         key_properties=schemas.PK_FIELDS[tap_stream_id],
                         schema=schema,
                         metadata=metadata.to_list(mdata)))
    return catalog
Esempio n. 24
0
def main():
    args = get_args()

    jira_config = args.config
    # jira client instance
    jira_client = Client(jira_config)

    # Setup Context
    Context.client = jira_client
    catalog = Catalog.from_dict(args.properties) \
        if args.properties else discover()
    Context.config = jira_config
    Context.state = args.state
    Context.catalog = catalog

    try:
        if args.discover:
            discover().dump()
            print()
        else:
            sync()
    finally:
        if Context.client and Context.client.login_timer:
            Context.client.login_timer.cancel()
Esempio n. 25
0
def discover(ctx):
    check_authorization(ctx)
    catalog = Catalog([])
    for tap_stream_id in schemas.STATIC_SCHEMA_STREAM_IDS:
        schema = Schema.from_dict(schemas.load_schema(tap_stream_id))
        metadata = []
        if tap_stream_id in schemas.ROOT_METADATA:
            metadata.append(schemas.ROOT_METADATA[tap_stream_id])
        for field_name in schema.properties.keys():
            if field_name in schemas.PK_FIELDS[tap_stream_id]:
                inclusion = 'automatic'
            else:
                inclusion = 'available'
            metadata.append({
                'metadata': {
                    'inclusion': inclusion
                },
                'breadcrumb': ['properties', field_name]
            })
        catalog.streams.append(CatalogEntry(
            stream=tap_stream_id,
            tap_stream_id=tap_stream_id,
            key_properties=schemas.PK_FIELDS[tap_stream_id],
            schema=schema,
            metadata=metadata
        ))
    contacts_schema, contact_metadata = schemas.get_contacts_schema(ctx)
    catalog.streams.append(CatalogEntry(
        stream='contacts',
        tap_stream_id='contacts',
        key_properties=schemas.PK_FIELDS['contacts'],
        schema=contacts_schema,
        metadata=contact_metadata
    ))

    return catalog
Esempio n. 26
0
def resolve_catalog(discovered_catalog, streams_to_sync):
    result = Catalog(streams=[])

    # Iterate over the streams in the input catalog and match each one up
    # with the same stream in the discovered catalog.
    for catalog_entry in streams_to_sync:
        catalog_metadata = metadata.to_map(catalog_entry.metadata)
        replication_key = catalog_metadata.get((), {}).get('replication-key')

        discovered_table = discovered_catalog.get_stream(catalog_entry.tap_stream_id)
        database_name = common.get_database_name(catalog_entry)

        if not discovered_table:
            LOGGER.warning('Database %s table %s was selected but does not exist',
                           database_name, catalog_entry.table)
            continue

        selected = {k for k, v in catalog_entry.schema.properties.items()
                    if common.property_is_selected(catalog_entry, k) or k == replication_key}

        # These are the columns we need to select
        columns = desired_columns(selected, discovered_table.schema)

        result.streams.append(CatalogEntry(
            tap_stream_id=catalog_entry.tap_stream_id,
            metadata=catalog_entry.metadata,
            stream=catalog_entry.stream,
            table=catalog_entry.table,
            schema=Schema(
                type='object',
                properties={col: discovered_table.schema.properties[col]
                            for col in columns}
            )
        ))

    return result
Esempio n. 27
0
def discover():
    schemas, field_metadata = get_schemas()
    catalog = Catalog([])

    flat_streams = flatten_streams()
    for stream_name, schema_dict in schemas.items():
        try:
            schema = Schema.from_dict(schema_dict)
            mdata = field_metadata[stream_name]
        except Exception as err:
            LOGGER.error(err)
            LOGGER.error('stream_name: {}'.format(stream_name))
            LOGGER.error('type schema_dict: {}'.format(type(schema_dict)))
            raise err

        catalog.streams.append(
            CatalogEntry(stream=stream_name,
                         tap_stream_id=stream_name,
                         key_properties=flat_streams.get(stream_name, {}).get(
                             'key_properties', None),
                         schema=schema,
                         metadata=mdata))

    return catalog
Esempio n. 28
0
def discover(config):
    model_id = config.get('model_id')
    schemas, field_metadata = get_schemas()
    catalog = Catalog([])

    flat_streams = flatten_streams()
    for stream_name, schema_dict in schemas.items():
        process_stream = True
        # conversion_paths endpoint requires model_id tap config param
        if stream_name == 'conversion_paths' and not model_id:
            process_stream = False
        if process_stream:
            schema = Schema.from_dict(schema_dict)
            mdata = field_metadata[stream_name]

            catalog.streams.append(
                CatalogEntry(stream=stream_name,
                             tap_stream_id=stream_name,
                             key_properties=flat_streams.get(
                                 stream_name, {}).get('key_properties', None),
                             schema=schema,
                             metadata=mdata))

    return catalog
Esempio n. 29
0
def main_impl():
    args = utils.parse_args(REQUIRED_CONFIG_KEYS)
    account_id = args.config['account_id']
    access_token = args.config['access_token']

    CONFIG.update(args.config)

    FacebookAdsApi.init(access_token=access_token)
    user = fb_user.User(fbid='me')
    accounts = user.get_ad_accounts()
    account = None
    for acc in accounts:
        if acc['account_id'] == account_id:
            account = acc
    if not account:
        raise TapFacebookException("Couldn't find account with id {}".format(account_id))

    if args.discover:
        do_discover()
    elif args.properties:
        catalog = Catalog.from_dict(args.properties)
        do_sync(account, catalog, args.state)
    else:
        LOGGER.info("No properties were selected")
Esempio n. 30
0
def main():
    required_config_keys = ['start_date']
    args = singer.parse_args(required_config_keys)

    config = args.config
    freshdesk_client = FreshdeskClient(args.config_path, config)
    catalog = args.catalog or Catalog([])
    state = args.state

    if args.properties and not args.catalog:
        raise Exception("DEPRECATED: Use of the 'properties' parameter is not supported. Please use --catalog instead")

    if args.discover:
        LOGGER.info("Starting discovery mode")
        catalog = discover(freshdesk_client)
        write_catalog(catalog)
    else:
        LOGGER.info("Starting sync mode")

        config, state = parse_args(REQUIRED_CONFIG_KEYS)
        CONFIG.update(config)
        STATE.update(state)

        sync(freshdesk_client, config, state, catalog)