Example #1
0
def schema_for_column(c):
    column_schema = {}

    #NB> from the post postgres docs: The current implementation does not enforce the declared number of dimensions either.
    #these means we can say nothing about an array column. its items may be more arrays or primitive types like integers
    #and this can vary on a row by row basis
    if c.array_dimensions > 0:
        column_schema["type"] = ["null", "array"]
        column_schema["items"] = {}
        return Schema.from_dict(column_schema)

    return Schema.from_dict(schema_for_column_datatype(c))
Example #2
0
def generate_schemas(config: Dict) -> Dict:
    """Generate schemas based on the files or urls supplied in the config

    Args:
        config (Dict): Configuration

    Returns:
        Dict: Return a dictionary with stream_id: schema, key value pairs
    """
    # Storage place for the individual schemas
    schemas = {}

    # Create a geo datasource instance
    geo_source = GeoSource(path=config['path'], config=config)

    # Each layer is a stream
    for layer_name, layer in geo_source.layers.items():
        LOGGER.info(f'Found layer {layer_name}')
        schemas[layer_name] = Schema.from_dict({
            'type': ['null', 'object'],
            'additionalProperties': False,
            'selected': True,
            'properties': layer.schema
        })

    return schemas
 def test_should_output_records(self, mock_stdout, requests_mock):
     requests_mock.get("https://api.nikabot.com/api/v1/groups?limit=1000&page=0", json=json.loads(GROUPS_RESPONSE))
     requests_mock.get("https://api.nikabot.com/api/v1/groups?limit=1000&page=1", json=json.loads(EMPTY_RESPONSE))
     config = {"access_token": "my-access-token", "page_size": 1000}
     state = {}
     catalog = Catalog(
         streams=[
             CatalogEntry(
                 tap_stream_id="groups",
                 stream="groups",
                 schema=Schema.from_dict({}),
                 key_properties=["id"],
                 metadata=[{"breadcrumb": [], "metadata": {"selected": True}}],
             )
         ]
     )
     sync(config, state, catalog)
     assert mock_stdout.mock_calls == [
         call('{"type": "SCHEMA", "stream": "groups", "schema": {}, "key_properties": ["id"]}\n'),
         call(
             '{"type": "RECORD", "stream": "groups", "record": {"id": "f1b4b37cc2658672770b789f", "team_id": "T034F9NPW", "name": "TA Squad 5"}, "time_extracted": "2020-01-01T00:00:00.000000Z"}\n'
         ),
         call(
             '{"type": "RECORD", "stream": "groups", "record": {"id": "3176700ac4f2203b825fae6c", "team_id": "T034F9NPW", "name": "Platform Toolkit"}, "time_extracted": "2020-01-01T00:00:00.000000Z"}\n'
         ),
     ]
Example #4
0
 def test_should_output_records(self, mock_stdout, requests_mock):
     requests_mock.get(
         "https://api.nikabot.com/api/v1/projects?limit=1000&page=0",
         json=json.loads(PROJECTS_RESPONSE))
     requests_mock.get(
         "https://api.nikabot.com/api/v1/projects?limit=1000&page=1",
         json=json.loads(EMPTY_RESPONSE))
     config = {"access_token": "my-access-token", "page_size": 1000}
     state = {}
     catalog = Catalog(streams=[
         CatalogEntry(
             tap_stream_id="projects",
             stream="projects",
             schema=Schema.from_dict({}),
             key_properties=["id"],
             metadata=[{
                 "breadcrumb": [],
                 "metadata": {
                     "selected": True
                 }
             }],
         )
     ])
     sync(config, state, catalog)
     assert mock_stdout.mock_calls == [
         call(
             '{"type": "SCHEMA", "stream": "projects", "schema": {}, "key_properties": ["id"]}\n'
         ),
         call(
             '{"type": "RECORD", "stream": "projects", "record": {"id": "5d6ca95e62a07c00045126e7", "project_name": "CAP - Analytics", "team_id": "T034F9NPW", "author": "U6K26HMGV", "pto": {"status": false}, "custom_ref": "", "create_date": "2019-09-02T05:32:14.23", "client": "", "type": "Capability Custodian", "created_at": "2019-09-02T05:32:14.23", "assigned_groups": ["Analytics"]}, "time_extracted": "2020-01-01T00:00:00.000000Z"}\n'
         ),
         call(
             '{"type": "RECORD", "stream": "projects", "record": {"id": "5d6ca97c62a07c00045126e8", "project_name": "CAP - Authentication", "team_id": "T034F9NPW", "author": "U6K26HMGV", "pto": {"status": false}, "custom_ref": "", "create_date": "2019-09-02T05:32:44.172", "client": "", "type": "Capability Custodian", "created_at": "2019-09-02T05:32:44.172", "assigned_groups": ["Authentication"]}, "time_extracted": "2020-01-01T00:00:00.000000Z"}\n'
         ),
     ]
 def test_should_output_records(self, mock_stdout, requests_mock):
     requests_mock.get(
         "https://api.nikabot.com/api/v1/roles?limit=1000&page=0",
         json=json.loads(ROLES_RESPONSE))
     requests_mock.get(
         "https://api.nikabot.com/api/v1/roles?limit=1000&page=1",
         json=json.loads(EMPTY_RESPONSE))
     config = {"access_token": "my-access-token", "page_size": 1000}
     state = {}
     catalog = Catalog(streams=[
         CatalogEntry(
             tap_stream_id="roles",
             stream="roles",
             schema=Schema.from_dict({}),
             key_properties=["id"],
             metadata=[{
                 "breadcrumb": [],
                 "metadata": {
                     "selected": True
                 }
             }],
         )
     ])
     sync(config, state, catalog)
     assert mock_stdout.mock_calls == [
         call(
             '{"type": "SCHEMA", "stream": "roles", "schema": {}, "key_properties": ["id"]}\n'
         ),
         call(
             '{"type": "RECORD", "stream": "roles", "record": {"id": "d893ebf32d49c35c1d754774", "team_id": "T034F9NPW", "name": "0.5"}, "time_extracted": "2020-01-01T00:00:00.000000Z"}\n'
         ),
         call(
             '{"type": "RECORD", "stream": "roles", "record": {"id": "cfabd9aa6f3e6381a716da58", "team_id": "T034F9NPW", "name": "0.1"}, "time_extracted": "2020-01-01T00:00:00.000000Z"}\n'
         ),
     ]
Example #6
0
    def test_insights_start_dates(self):
        insights = AdsInsights(
            name='insights',
            account=None,
            stream_alias="insights",
            options={},
            annotated_schema=Schema.from_dict({
                'selected': True,
                'properties': {
                    'something': {
                        'type': 'object'
                    }
                }
            }),
            state={'bookmarks': {
                'insights': {
                    'date_start': '2017-01-31'
                }
            }})
        params = list(itertools.islice(insights.job_params(), 5))
        self.assertEqual(params[0]['time_ranges'], [{
            'since': '2017-01-03',
            'until': '2017-01-03'
        }])

        self.assertEqual(params[4]['time_ranges'], [{
            'since': '2017-01-07',
            'until': '2017-01-07'
        }])
 def test_should_output_records(self, mock_stdout, requests_mock):
     requests_mock.get("https://api.nikabot.com/api/v1/teams",
                       json=json.loads(TEAMS_RESPONSE))
     config = {"access_token": "my-access-token", "page_size": 1000}
     state = {}
     catalog = Catalog(streams=[
         CatalogEntry(
             tap_stream_id="teams",
             stream="teams",
             schema=Schema.from_dict({}),
             key_properties=["id"],
             metadata=[{
                 "breadcrumb": [],
                 "metadata": {
                     "selected": True
                 }
             }],
         )
     ])
     sync(config, state, catalog)
     assert mock_stdout.mock_calls == [
         call(
             '{"type": "SCHEMA", "stream": "teams", "schema": {}, "key_properties": ["id"]}\n'
         ),
         call(
             '{"type": "RECORD", "stream": "teams", "record": {"id": "5d6ca50762a07c00045125fb", "domain": "pageup", "bot_token": "e31d3b7ae51ff1feec8be578f23eb017e8143f66a7a085342c664544b81618ec41b87810d61a9c1f6133fe0c7d88aa3976232bb2a2665c4f89c38058b51cd20c", "activated_by": "U6K26HMGV", "status": "ACTIVE", "platform_id": "T034F9NPW", "created_at": "2019-09-02T05:13:43.151", "subscription": {"active_until": "2020-07-08T23:59:59", "status": "active", "number_of_users": 69, "subscriber_id": "U93KT77T6"}, "icon": {"image_34": "https://avatars.slack-edge.com/2017-09-15/241678543093_b2ad80be9268cdbd89c3_34.png", "image_44": "https://avatars.slack-edge.com/2017-09-15/241678543093_b2ad80be9268cdbd89c3_44.png", "image_68": "https://avatars.slack-edge.com/2017-09-15/241678543093_b2ad80be9268cdbd89c3_68.png", "image_88": "https://avatars.slack-edge.com/2017-09-15/241678543093_b2ad80be9268cdbd89c3_88.png", "image_102": "https://avatars.slack-edge.com/2017-09-15/241678543093_b2ad80be9268cdbd89c3_102.png", "image_132": "https://avatars.slack-edge.com/2017-09-15/241678543093_b2ad80be9268cdbd89c3_132.png", "image_230": "https://avatars.slack-edge.com/2017-09-15/241678543093_b2ad80be9268cdbd89c3_230.png", "image_original": "https://avatars.slack-edge.com/2017-09-15/241678543093_b2ad80be9268cdbd89c3_original.png"}}, "time_extracted": "2020-01-01T00:00:00.000000Z"}\n'
         ),
     ]
Example #8
0
 def test_object_from_dict_with_defaults(self):
     schema = Schema.from_dict(self.object_dict, inclusion='automatic')
     self.assertEquals('whatever',
                       schema.inclusion,
                       msg='The schema value should override the default')
     self.assertEquals('automatic', schema.properties['a_string'].inclusion)
     self.assertEquals('automatic',
                       schema.properties['an_array'].items.inclusion)
Example #9
0
 def _map_to_schema(self, swagger: JsonResult) -> Schema:
     schema_with_refs = {
         **swagger["definitions"]["RecordDTO"],
         **{
             "definitions": swagger["definitions"]
         }
     }
     schema = resolve_schema_references(schema_with_refs)
     return Schema.from_dict(schema)
Example #10
0
def load_schemas():
    # TODO: This example loads schemas from a project folder, replace with whatever discovery mechanism is available
    schemas = {}
    for filename in os.listdir(get_abs_path('schemas')):
        path = get_abs_path('schemas') + '/' + filename
        file_raw = filename.replace('.json', '')
        with open(path) as file:
            schemas[file_raw] = Schema.from_dict(json.load(file))
    return schemas
Example #11
0
def load_schemas() -> Dict[str, Any]:
    """ Load schemas from schemas folder """
    schemas = {}
    for filename in os.listdir(_get_abs_path("schemas")):
        path = _get_abs_path("schemas") + "/" + filename
        file_raw = filename.replace(".json", "")
        with open(path) as file:
            schemas[file_raw] = Schema.from_dict(json.load(file))
    return schemas
Example #12
0
def load_schemas():
    """ Load schemas from schemas folder """
    schemas = {}
    for filename in os.listdir(get_abs_path('schemas')):
        path = get_abs_path('schemas') + '/' + filename
        file_raw = filename.replace('.json', '')
        with open(path) as file:
            schemas[file_raw] = Schema.from_dict(json.load(file))
    return schemas
Example #13
0
def discover(config):
    streams = []
    for table_spec in config['tables']:
        modified_since = dateutil.parser.parse(table_spec['start_date'])
        target_files = file_utils.get_input_files_for_table(
            table_spec, modified_since)
        sample_rate = table_spec.get('sample_rate', 10)
        max_sampling_read = table_spec.get('max_sampling_read', 1000)
        max_sampled_files = table_spec.get('max_sampled_files', 5)
        prefer_number_vs_integer = table_spec.get('prefer_number_vs_integer',
                                                  False)
        samples = file_utils.sample_files(table_spec,
                                          target_files,
                                          sample_rate=sample_rate,
                                          max_records=max_sampling_read,
                                          max_files=max_sampled_files)

        metadata_schema = {
            '_smart_source_bucket': {
                'type': 'string'
            },
            '_smart_source_file': {
                'type': 'string'
            },
            '_smart_source_lineno': {
                'type': 'integer'
            },
        }
        data_schema = conversion.generate_schema(
            samples, prefer_number_vs_integer=prefer_number_vs_integer)
        inferred_schema = {
            'type': 'object',
            'properties': merge_dicts(data_schema, metadata_schema)
        }

        merged_schema = override_schema_with_config(inferred_schema,
                                                    table_spec)
        schema = Schema.from_dict(merged_schema)

        stream_metadata = []
        key_properties = table_spec.get('key_properties', [])
        streams.append(
            CatalogEntry(
                tap_stream_id=table_spec['name'],
                stream=table_spec['name'],
                schema=schema,
                key_properties=key_properties,
                metadata=stream_metadata,
                replication_key=None,
                is_view=None,
                database=None,
                table=None,
                row_count=None,
                stream_alias=None,
                replication_method=None,
            ))
    return Catalog(streams)
Example #14
0
 def test_should_output_records(self, mock_stdout, requests_mock):
     requests_mock.get(
         "https://api.nikabot.com/api/v1/users?limit=1000&page=0",
         json=json.loads(USERS_RESPONSE))
     requests_mock.get(
         "https://api.nikabot.com/api/v1/users?limit=1000&page=1",
         json=json.loads(EMPTY_RESPONSE))
     config = {"access_token": "my-access-token", "page_size": 1000}
     state = {}
     catalog = Catalog(streams=[
         CatalogEntry(
             tap_stream_id="users",
             stream="users",
             schema=Schema.from_dict({}),
             key_properties=["id"],
             metadata=[{
                 "breadcrumb": [],
                 "metadata": {
                     "selected": True
                 }
             }],
         )
     ])
     sync(config, state, catalog)
     assert mock_stdout.mock_calls == [
         call(
             '{"type": "SCHEMA", "stream": "users", "schema": {}, "key_properties": ["id"]}\n'
         ),
         call(
             '{"type": "RECORD", "stream": "users", "record": {"id": "5de459977292020014fb601c", "name": "Billy", "deleted": true, "presence": "away", "user_id": "UR5B0QABX", "team_id": "T034F9NPW", "is_restricted": false, "is_ultra_restricted": false, "is_admin": false, "is_nikabot_admin": false, "tz": "Australia/Canberra", "tz_label": "Australian Eastern Standard Time", "tz_offset": 36000, "is_checkin_excluded": true, "created_at": "2019-12-02T00:23:51.087", "groups": [], "updated_at": "2020-06-14T22:47:29.617"}, "time_extracted": "2020-01-01T00:00:00.000000Z"}\n'
         ),
         call(
             '{"type": "RECORD", "stream": "users", "record": {"id": "68QMxnnt8YcpPdfmM", "name": "paul.heasley", "deleted": false, "presence": "active", "user_id": "U04AX35QP", "team_id": "T034F9NPW", "is_restricted": false, "is_ultra_restricted": false, "is_admin": false, "is_nikabot_admin": true, "tz": "Australia/Canberra", "tz_label": "Australian Eastern Standard Time", "tz_offset": 36000, "is_checkin_excluded": false, "create_date": "2019-09-02T05:13:47.88", "created_at": "2019-09-02T05:13:47.882", "role": "0.1", "groups": ["TA Stream", "TA Squad 1", "TA Squad 2", "TA Squad 3", "TA Squad 4", "Learning Applications", "Notification Capability"], "updated_at": "2020-06-15T06:07:58.272"}, "time_extracted": "2020-01-01T00:00:00.000000Z"}\n'
         ),
     ]
     assert LOGGER.info.mock_calls == [
         call("Syncing stream: %s", "users"),
         call(
             "Making %s request to %s with params %s",
             "GET",
             "https://api.nikabot.com/api/v1/users",
             {
                 "limit": "1000",
                 "page": "0"
             },
         ),
         call(
             "Making %s request to %s with params %s",
             "GET",
             "https://api.nikabot.com/api/v1/users",
             {
                 "limit": "1000",
                 "page": "1"
             },
         ),
     ]
Example #15
0
def discover(client, custom_reports):
    raw_schemas = load_schemas()
    streams = []
    for stream_id, schema in raw_schemas.items():
        stream_instance = STREAMS[stream_id]
        stream_metadata = metadata.get_standard_metadata(
            schema=schema.to_dict(),
            key_properties=stream_instance.key_properties,
            valid_replication_keys=stream_instance.replication_key,
            replication_method=stream_instance.replication_method)
        streams.append(
            CatalogEntry(
                tap_stream_id=stream_id,
                stream=stream_id,
                schema=schema,
                key_properties=stream_instance.key_properties,
                metadata=stream_metadata,
                replication_key=stream_instance.replication_key,
                is_view=None,
                database=None,
                table=None,
                row_count=None,
                stream_alias=None,
                replication_method=stream_instance.replication_method,
            ))
    if custom_reports:
        for report in custom_reports:
            schema = build_schema(client, report)
            schema = Schema.from_dict(schema)
            key_properties = report.get('key_properties')
            replication_key = report.get('valid_replication_keys')
            stream_metadata = metadata.get_standard_metadata(
                schema=schema.to_dict(),
                key_properties=key_properties,
                valid_replication_keys=replication_key,
                replication_method=None)
            streams.append(
                CatalogEntry(
                    tap_stream_id=report['stream_id'],
                    stream=report['stream_id'],
                    schema=schema,
                    key_properties=report.get('key_properties'),
                    metadata=stream_metadata,
                    replication_key=report.get('valid_replication_keys'),
                    is_view=None,
                    database=None,
                    table=None,
                    row_count=None,
                    stream_alias=report,
                    replication_method=None,
                ))
    return Catalog(streams)
Example #16
0
def mock_catalog():
    return Catalog(
        streams=[
            CatalogEntry(
                tap_stream_id="records",
                stream="records",
                schema=Schema.from_dict(json.loads(SCHEMA)),
                key_properties=["id"],
                metadata=[{"breadcrumb": [], "metadata": {"selected": True}}],
                replication_key="date",
                replication_method="INCREMENTAL",
            )
        ]
    )
Example #17
0
def generate_schema(table_spec, samples):
    metadata_schema = {
        '_smart_source_bucket': {'type': 'string'},
        '_smart_source_file': {'type': 'string'},
        '_smart_source_lineno': {'type': 'integer'},
    }
    prefer_number_vs_integer = table_spec.get('prefer_number_vs_integer', False)
    data_schema = conversion.generate_schema(samples, prefer_number_vs_integer=prefer_number_vs_integer)
    inferred_schema = {
        'type': 'object',
        'properties': merge_dicts(data_schema, metadata_schema)
    }

    merged_schema = override_schema_with_config(inferred_schema, table_spec)
    return Schema.from_dict(merged_schema)
Example #18
0
def build_schema(query_resource):
    return Schema.from_dict({
        'type': ['null', 'object'],
        'additionalProperties': False,
        'properties': {
            **{
                key: {'type': ['null', 'string']}
                for key in query_resource['params']['groupBys']
            },
            **{
                key: {'type': ['null', 'number']}
                for key in query_resource['params']['metrics']
            },
        }
    })
Example #19
0
def discover():
    schemas, field_metadata = get_schemas()
    catalog = Catalog([])

    for stream_name, schema_dict in schemas.items():
        schema = Schema.from_dict(schema_dict)
        mdata = field_metadata[stream_name]

        catalog.streams.append(
            CatalogEntry(stream=stream_name,
                         tap_stream_id=stream_name,
                         key_properties=STREAMS[stream_name]['key_properties'],
                         schema=schema,
                         metadata=mdata))

    return catalog
Example #20
0
 def test_should_output_nothing_given_no_streams_selected(
         self, mock_stdout):
     config = {"access_token": "my-access-token", "page_size": 1000}
     state = {}
     catalog = Catalog(streams=[
         CatalogEntry(
             tap_stream_id="users",
             stream="users",
             schema=Schema.from_dict({}),
             key_properties=["id"],
             metadata=[],
         )
     ])
     sync(config, state, catalog)
     mock_stdout.assert_not_called()
     assert LOGGER.info.mock_calls == [call("Skipping stream: %s", "users")]
Example #21
0
def discover():
    entries = []

    for stream in streams:
        schema = Schema.from_dict(stream.get_schema())
        stream_metadata = []
        key_properties = stream.key_properties
        for prop, json_schema in schema.properties.items():
            inclusion = 'available'
            if prop in key_properties or prop == 'start_date':
                inclusion = 'automatic'

            stream_metadata.append({
                'breadcrumb': [],
                'metadata': {
                    'inclusion': 'available',
                    'table-key-properties': key_properties,
                    'schema-name': stream.tap_stream_id,
                    'selected': True,
                }
            })

            stream_metadata.append({
                'breadcrumb': ['properties', prop],
                'metadata': {
                    'inclusion': inclusion
                }
            })

        entries.append(
            CatalogEntry(
                tap_stream_id=stream.tap_stream_id,
                stream=stream.tap_stream_id,
                schema=schema,
                key_properties=key_properties,
                metadata=stream_metadata,
                replication_key=None,
                is_view=None,
                database=None,
                table=None,
                row_count=None,
                stream_alias=None,
                replication_method=None,
            )
        )
    return Catalog(entries)
def load_schemas(config):
    """ Load schemas from schemas folder """
    schemas = {}
    schema_dir_path = get_abs_path(config['schema_dir'])
    if os.path.isdir(schema_dir_path):
        for filename in os.listdir(schema_dir_path):
            path = get_abs_path(config['schema_dir']) + '/' + filename
            file_raw = filename.replace('.json', '')
            if os.path.isfile(path):
                with open(path) as file:
                    try:
                        schemas[file_raw] = Schema.from_dict(json.load(file))
                    except json.decoder.JSONDecodeError as err:
                        LOGGER.warning("Schema file : " + file_raw + " is invalid or not JSON : " + err.msg)
    else:
        LOGGER.warning(schema_dir_path + " : Is not a valid directory")
    return schemas
Example #23
0
def load_schemas() -> dict:
    """Load schemas from schemas folder.

    Returns:
        dict -- Scemas
    """
    schemas: dict = {}

    # For every file in the schemas directory
    for filename in os.listdir(get_abs_path('schemas')):
        abs_path: str = get_abs_path('schemas')
        file_raw: str = filename.replace('.json', '')

        # Open and load the schema
        with open(f'{abs_path}/{filename}') as schema_file:
            schemas[file_raw] = Schema.from_dict(json.load(schema_file))
    return schemas
def discover(config):
    client = Client(config)
    streams = []
    for _, stream in STREAM_OBJECTS.items():
        raw_schema = load_schema(stream.tap_stream_id)
        schema = Schema.from_dict(raw_schema)
        streams.append(
            CatalogEntry(stream=stream.tap_stream_id,
                         tap_stream_id=stream.tap_stream_id,
                         key_properties=stream.pk_fields,
                         schema=schema,
                         metadata=metadata.get_standard_metadata(
                             schema=raw_schema,
                             schema_name=stream.tap_stream_id,
                             key_properties=stream.pk_fields,
                             valid_replication_keys=stream.replication_keys,
                             replication_method=stream.replication_method)))
    return Catalog(streams)
Example #25
0
 def from_dict(cls, data):
     # TODO: We may want to store streams as a dict where the key is a
     # tap_stream_id and the value is a CatalogEntry. This will allow
     # faster lookup based on tap_stream_id. This would be a breaking
     # change, since callers typically access the streams property
     # directly.
     streams = []
     for stream in data['streams']:
         entry = CatalogEntry()
         entry.tap_stream_id = stream.get('tap_stream_id')
         entry.stream = stream.get('stream')
         entry.replication_key = stream.get('replication_key')
         entry.key_properties = stream.get('key_properties')
         entry.database = stream.get('database_name')
         entry.table = stream.get('table_name')
         entry.schema = Schema.from_dict(stream.get('schema'))
         entry.is_view = stream.get('is_view')
         streams.append(entry)
     return Catalog(streams)
Example #26
0
def get_schemas():
    schemas = {}
    schemas_metadata = {}

    for stream_name, stream_metadata in STREAMS.items():
        schema = None
        schema_path = get_abs_path('schemas/{}.json'.format(stream_name))
        with open(schema_path) as file:
            schema = json.load(file)
        meta = metadata.get_standard_metadata(
            schema=schema,
            key_properties=stream_metadata.get('key_properties', None),
            valid_replication_keys=stream_metadata.get('replication_keys',
                                                       None),
            replication_method=stream_metadata.get('replication_method', None))
        schemas[stream_name] = Schema.from_dict(schema)
        schemas_metadata[stream_name] = meta

    return schemas, schemas_metadata
Example #27
0
    def get_schema(self, sheet_name, worksheet_name=None):
        data = self.get_data(sheet_name, worksheet_name)

        # add object to schema builder so he can infer schema
        builder = SchemaBuilder()
        if len(data) == 0:
            # build sample record to be used for schema inference if the
            # spreadsheet is empty
            sample_record = {
                key: "some string"
                for key in self.headers[worksheet_name]
            }
            builder.add_object(sample_record)
        else:
            for record in data:
                builder.add_object(record)

        # create a singer Schema from Json Schema
        singer_schema = Schema.from_dict(builder.to_schema())
        self.schema[worksheet_name] = singer_schema.to_dict()

        return self.schema[worksheet_name]
Example #28
0
 def test_should_output_no_records_given_no_records_available(
         self, mock_stdout, requests_mock):
     requests_mock.get(
         "https://api.nikabot.com/api/v1/users?limit=1000&page=0",
         json=json.loads(EMPTY_RESPONSE))
     config = {"access_token": "my-access-token", "page_size": 1000}
     state = {}
     catalog = Catalog(streams=[
         CatalogEntry(
             tap_stream_id="users",
             stream="users",
             schema=Schema.from_dict({}),
             key_properties=["id"],
             metadata=[{
                 "breadcrumb": [],
                 "metadata": {
                     "selected": True
                 }
             }],
         )
     ])
     sync(config, state, catalog)
     assert mock_stdout.mock_calls == [
         call(
             '{"type": "SCHEMA", "stream": "users", "schema": {}, "key_properties": ["id"]}\n'
         )
     ]
     assert LOGGER.info.mock_calls == [
         call("Syncing stream: %s", "users"),
         call(
             "Making %s request to %s with params %s",
             "GET",
             "https://api.nikabot.com/api/v1/users",
             {
                 "limit": "1000",
                 "page": "0"
             },
         ),
     ]
Example #29
0
    def test_insights_job_params_stops(self):
        start_date = tap_facebook.TODAY.subtract(days=2)
        insights = AdsInsights(name='insights',
                               account=None,
                               stream_alias="insights",
                               options={},
                               annotated_schema=Schema.from_dict({
                                   'selected': True,
                                   'properties': {
                                       'something': {
                                           'type': 'object'
                                       }
                                   }
                               }),
                               state={
                                   'bookmarks': {
                                       'insights': {
                                           'date_start':
                                           start_date.to_date_string()
                                       }
                                   }
                               })

        self.assertEqual(31, len(list(insights.job_params())))
Example #30
0
def load_schemas():
    ### Load schemas from schemas folder ###
    schemas = {}
    schemas_path = get_abs_path('schemas')

    # If the '/schemas/ folder is missing, create it.
    if not os.path.isdir(schemas_path):
        os.mkdir(schemas_path)

    # If no schemas are found in the /schemas/ folder, then generate them using create_schemas.py
    if len([
            name for name in os.listdir(schemas_path)
            if os.path.isfile(os.path.join(schemas_path, name))
    ]) == 0:
        create_schemas()

    # now grab the .json files in /schemas/ and output the catalog.json file.

    for filename in os.listdir(schemas_path):
        path = schemas_path + '/' + filename
        file_raw = filename.replace('.json', '')
        with open(path) as file:
            schemas[file_raw] = Schema.from_dict(json.load(file))
    return schemas  # returns a 'dict' that contains <class 'singer.schema.Schema'> objects