Ejemplo n.º 1
0
def create(name, filepath):
    socrata = Socrata(auth)
    with open(filepath, 'rb') as csv_file:
        (revision, output) = socrata.create(name=name).csv(csv_file)

        job = revision.apply(output_schema=output)
        revision.open_in_browser()
def main(args):
    # Get the dataset name from the command line
    if args.DataSet != "":

        # Get the dataset info from the XML
        ods = getDatasetInfo(args.DataSet)
        # Create the connection to the database
        connection = ods.getConnection()
        # Create the dataframe
        df = pd.read_sql(ods.query, con=connection)
        # Close the database connection
        connection.close
        # Save the dataframe to a file
        writeFile(df, args.FileName)
        # Post the file to SCGC?
        if args.Upload == True:
            # Authenticate to the portal
            auth = Authorization('austin-aph.data.socrata.com',
                                 'Your_Socrata_Username',
                                 'Your_Socrata_Password')
            socrata = Socrata(auth)
            # Find the view for the dataset
            (ok, view) = socrata.views.lookup(ods.view)
            assert ok, view
            # Open the file
            with open(args.FileName, 'rb') as my_file:
                # Get the config file for the view
                (ok, job) = socrata.using_config(ods.config, view).csv(my_file)
                assert ok, job
                # Write out the progress of the job
                assert ok, job
                (ok, job) = job.wait_for_finish(progress=lambda job: print(
                    'Job progress:', job.attributes['status']))
Ejemplo n.º 3
0
 def setUp(self):
     self.pub = Socrata(auth)
     (ok, rev) = self.pub.new({'name': 'test-view'})
     assert ok, rev
     self.rev = rev
     (ok, view) = self.pub.views.lookup(rev.attributes['fourfour'])
     assert ok, view
     self.view = view
Ejemplo n.º 4
0
    def test_upload_to_config(self):
        p = Socrata(auth)
        name = "some_config %s" % str(uuid.uuid4())
        (ok, config) = p.configs.create(name, "replace")
        self.assertTrue(ok, config)

        p = Socrata(auth)
        with open('test/fixtures/simple.csv', 'rb') as my_file:
            (rev, job) = p.using_config(name, self.view).csv(my_file)
            self.assertEqual(rev.attributes['action']['type'], 'replace')
            self.assertTrue(job.attributes['created_at'])
Ejemplo n.º 5
0
 def wrapper(slf):
     test_name = str(method.__qualname__)
     pub = Socrata(auth)
     with open('test/fixtures/%s' % filename, 'rb') as file:
         create = pub.create(name="test for %s" % test_name,
                             description="a description")
         (revision, output) = getattr(create, kind)(file)
         try:
             method(slf, output)
         finally:
             (ok, view) = pub.views.lookup(revision.view_id())
             view.delete()
Ejemplo n.º 6
0
def create(name, filepath):
    socrata = Socrata(auth)
    with open(filepath, 'rb') as csv_file:
        (initial_rev, output) = socrata.create(
            name = name
        ).csv(csv_file)

        job = initial_rev.apply(output_schema = output)
        job = job.wait_for_finish()

        view = socrata.views.lookup(initial_rev.attributes['fourfour'])
        update(view)
    def __init__(self, uuid=None, mq_fp=None, auth_param=auth_param):
        self.existing_dataset = bool(uuid)
        self.uuid = uuid
        self.mq_fp = mq_fp
        self.auth_param = auth_param
        self.socrata = Socrata(Authorization(*auth_param))
        self.mq = None
        self.metadata = None

        if mq_fp:
            self.mq = ITSMetadataQuestionnaire(mq_fp)
        self.retrieve_metadata()
Ejemplo n.º 8
0
    def __init__(self, env: Literal["test", "impl", "prod"]) -> None:
        """Initialize this class instance with default values."""
        logger.info(f"Initializing Loader for env {env}")
        # Set env
        self.env = env

        # Load lookup tables
        self.load_files_table()
        self.load_schemas_table()
        self.load_tracker_table()

        # Initialize Socrata client
        auth: Tuple[str] = Authorization(SOCRATA_DOMAIN, *SOCRATA_CREDENTIALS)
        self.client = Socrata(auth)
Ejemplo n.º 9
0
    def test_source_to_config(self):
        p = Socrata(auth)
        name = "some_config %s" % str(uuid.uuid4())
        (ok, config) = p.configs.create(name, "replace")
        self.assertTrue(ok, config)

        p = Socrata(auth)
        (rev, job) = p.using_config(name, self.view).csv("""a,b,c
                1,2,3
                4,5,6
                7,8,9
            """,
                                                         filename="abc.csv")
        self.assertEqual(rev.attributes['action']['type'], 'replace')
        self.assertTrue(job.attributes['created_at'])
Ejemplo n.º 10
0
    def test_create_new_csv_from_str(self):
        string = """a,b,c
        1,2,3
        4,5,6
        7,8,9
        """

        (revision, output) = Socrata(auth).create(
            name="cool dataset",
            description="a description").csv(string, filename="foo.csv")
        try:
            self.assertIsNotNone(output.attributes['completed_at'])
        finally:
            (ok, view) = Socrata(auth).views.lookup(revision.view_id())
            view.delete()
Ejemplo n.º 11
0
 def test_put_source_in_revision(self):
     pub = Socrata(auth)
     source = pub.sources.create_upload('foo.csv')
     df = pd.read_csv('test/fixtures/simple.csv')
     input_schema = source.df(df)
     rev = self.create_rev()
     source = source.add_to_revision(rev)
Ejemplo n.º 12
0
    def test_source_change_on_existing_upload(self):
        pub = Socrata(auth)
        (ok, source) = pub.sources.create_upload('foo.csv')
        self.assertTrue(ok, source)

        with open('test/fixtures/skip-header.csv', 'rb') as f:
            (ok, source) = source.csv(f)
            self.assertTrue(ok, source)


        (ok, source) = source\
            .change_parse_option('header_count').to(2)\
            .change_parse_option('column_header').to(2)\
            .run()

        self.assertTrue(ok, source)

        po = source.attributes['parse_options']
        self.assertEqual(po['header_count'], 2)
        self.assertEqual(po['column_header'], 2)

        input_schema = source.get_latest_input_schema()
        self.assertTrue(ok, input_schema)
        (ok, output_schema) = input_schema.latest_output()
        self.assertTrue(ok, output_schema)

        [a, b, c] = output_schema.attributes['output_columns']

        self.assertEqual(a['field_name'], 'a')
        self.assertEqual(b['field_name'], 'b')
        self.assertEqual(c['field_name'], 'c')
Ejemplo n.º 13
0
def create_new_dataset(client: Socrata, dataframe: DataFrame, name: str,
                       description: str):
    """Create and publish a dataframe as a new Socrata dataset."""
    revision: Revision
    output_schema: OutputSchema
    revision, output_schema = client.create(
        name=name,
        description=description,
        attributionLink='https://api.census.gov').df(dataframe)
    output_schema = prepare_output_schema(output_schema)

    # Handle geometry column type
    if 'geometry' in dataframe.columns:
        geometry: Optional[Literal['points', 'polygons']]
        if len(dataframe.loc[dataframe['geometry'].fillna('').str.match(
                '^POINT')]):
            geometry = 'points'
        elif len(dataframe.loc[dataframe['geometry'].fillna('').str.match(
                '^MULTIPOLYGON')]):
            geometry = 'polygons'
        else:
            geometry = None
        output_schema = add_geometry_to_output_schema(output_schema, geometry)

    # Handle pre-1.x versions of Socrata-py
    if isinstance(output_schema, tuple):
        _, output_schema = output_schema

    output_schema.wait_for_finish()
    revision.apply(output_schema=output_schema)
    return revision
Ejemplo n.º 14
0
def to_socrata(
    domain: Union[URL, str],
    dataframe: DataFrame,
    dataset_id: str = None,
    name: str = None,
    description: str = None,
    auth: Tuple[str, str] = None,
    open_in_browser: bool = True,
) -> URL:
    """Publish an autocensus dataframe to Socrata."""
    # Serialize geometry to WKT
    try:
        dataframe['geometry'] = dataframe['geometry'].map(serialize_to_wkt)
    except KeyError:
        pass

    # Initialize client
    client = Socrata(
        Authorization(str(domain), *look_up_socrata_credentials(auth)))

    # If no 4x4 was supplied, create a new dataset
    if dataset_id is None:
        name = name if name is not None else 'American Community Survey Data'
        description = description if description is not None else ''
        revision = create_new_dataset(client, dataframe, name, description)
    else:
        revision = update_existing_dataset(client, dataframe, dataset_id)

    # Return URL
    if open_in_browser is True:
        revision.open_in_browser()
    return URL(revision.ui_url())
Ejemplo n.º 15
0
    def test_show_config(self):
        p = Socrata(auth)
        name = "some_config %s" % str(uuid.uuid4())
        (ok, config) = p.configs.create(name, "replace")
        self.assertTrue(ok, config)

        (ok, config) = config.show()
        self.assertTrue(ok, config)
Ejemplo n.º 16
0
    def test_lookup_config(self):
        p = Socrata(auth)
        name = "some_config %s" % str(uuid.uuid4())
        config = p.configs.create(name, "replace")

        config = p.configs.lookup(name)

        self.assertEqual(config.attributes['name'], name)
Ejemplo n.º 17
0
    def test_create_new_csv(self):
        with open('test/fixtures/simple.csv', 'rb') as file:
            (revision, output) = Socrata(auth).create(
                name = "cool dataset",
                description = "a description"
            ).csv(file)

            self.assertEqual(output.attributes['error_count'], 0)
            self.assertIsNotNone(output.attributes['completed_at'])
Ejemplo n.º 18
0
    def test_create_new_shapefile(self):
        with open('test/fixtures/zillow.zip', 'rb') as file:
            (revision, output) = Socrata(auth).create(
                name = "zillow",
                description = "a description"
            ).shapefile(file)

            self.assertEqual(output.attributes['error_count'], 0)
            self.assertIsNotNone(output.attributes['completed_at'])
Ejemplo n.º 19
0
    def test_create_source_outside_rev(self):
        pub = Socrata(auth)

        source = pub.sources.create_upload('foo.csv')
        self.assertEqual(source.attributes['source_type']['filename'],
                         'foo.csv')

        assert 'show' in source.list_operations()
        assert 'bytes' in source.list_operations()
Ejemplo n.º 20
0
    def test_put_source_in_revision(self):
        pub = Socrata(auth)

        source = pub.sources.create_upload('foo.csv')
        with open('test/fixtures/simple.csv', 'rb') as f:
            source = source.csv(f)
            input_schema = source.get_latest_input_schema()
            rev = self.create_rev()
            source = source.add_to_revision(rev)
Ejemplo n.º 21
0
    def test_delete_config(self):
        p = Socrata(auth)
        name = "some_config %s" % str(uuid.uuid4())
        config = p.configs.create(name, "replace")

        _ = config.delete()

        # TODO exception
        with self.assertRaises(UnexpectedResponseException):
            _ = config.show()
Ejemplo n.º 22
0
    def test_source_csv_outside_rev(self):
        pub = Socrata(auth)

        source = pub.sources.create_upload('foo.csv')
        df = pd.read_csv('test/fixtures/simple.csv')
        source = source.df(df)
        input_schema = source.get_latest_input_schema()
        names = sorted([
            ic['field_name'] for ic in input_schema.attributes['input_columns']
        ])
        self.assertEqual(['a', 'b', 'c'], names)
Ejemplo n.º 23
0
    def test_list_operations(self):
        p = Socrata(auth)
        name = "some_config %s" % str(uuid.uuid4())
        config = p.configs.create(name, "replace")

        configs = p.configs.list()

        # Assert there's some config on this domain where the
        # name is what we want
        self.assertTrue(
            any([config.attributes['name'] == name for config in configs]))
Ejemplo n.º 24
0
    def test_source_change_header_rows(self):
        pub = Socrata(auth)
        source = pub.sources.create_upload('foo.csv')

        source = source\
            .change_parse_option('header_count').to(2)\
            .change_parse_option('column_header').to(2)\
            .run()

        po = source.attributes['parse_options']
        self.assertEqual(po['header_count'], 2)
        self.assertEqual(po['column_header'], 2)
Ejemplo n.º 25
0
    def test_create_new_csv_from_str(self):
        string = """a,b,c
        1,2,3
        4,5,6
        7,8,9
        """

        (revision, output) = Socrata(auth).create(
            name = "cool dataset",
            description = "a description"
        ).csv(string, filename = "foo.csv")

        self.assertIsNotNone(output.attributes['completed_at'])
Ejemplo n.º 26
0
    def test_put_source_in_revision(self):
        pub = Socrata(auth)

        (ok, source) = pub.sources.create_upload('foo.csv')
        self.assertTrue(ok, source)

        df = pd.read_csv('test/fixtures/simple.csv')
        (ok, input_schema) = source.df(df)
        self.assertTrue(ok, input_schema)

        rev = self.create_rev()

        (ok, source) = source.add_to_revision(rev)
        self.assertTrue(ok, source)
Ejemplo n.º 27
0
class TestCase(unittest.TestCase):
    def create_rev(self):
        p = Socrata(auth)
        (ok, r) = self.view.revisions.create_update_revision()
        assert ok
        self.rev = r
        return r

    def create_input_schema(self, rev=None, filename='simple.csv'):
        if not rev:
            rev = self.create_rev()
        (ok, source) = rev.create_upload('foo.csv')
        assert ok
        with open('test/fixtures/%s' % filename, 'rb') as f:
            (ok, source) = source.csv(f)
            assert ok, source
            return source.get_latest_input_schema()

    def create_output_schema(self, input_schema=None):
        if not input_schema:
            input_schema = self.create_input_schema()

        (ok, output_schema) = input_schema.transform({
            'output_columns': [{
                "field_name": "b",
                "display_name": "b, but as a number",
                "position": 0,
                "description": "b but with a bunch of errors",
                "transform": {
                    "transform_expr": "to_number(b)"
                }
            }]
        })
        assert ok
        return output_schema

    def setUp(self):
        self.pub = Socrata(auth)
        (ok, rev) = self.pub.new({'name': 'test-view'})
        assert ok, rev
        self.rev = rev
        (ok, view) = self.pub.views.lookup(rev.attributes['fourfour'])
        assert ok, view
        self.view = view

    def tearDown(self):
        if getattr(self, 'rev', False):
            self.rev.discard()
        self.view.delete()
Ejemplo n.º 28
0
    def test_upload_csv_outside_rev(self):
        pub = Socrata(auth)

        (ok, source) = pub.sources.create_upload('foo.csv')
        self.assertTrue(ok, source)

        with open('test/fixtures/simple.csv', 'rb') as f:
            (ok, source) = source.csv(f)
            input_schema = source.get_latest_input_schema()
            self.assertTrue(ok, input_schema)
            names = sorted([
                ic['field_name']
                for ic in input_schema.attributes['input_columns']
            ])
            self.assertEqual(['a', 'b', 'c'], names)
Ejemplo n.º 29
0
    def test_update_config(self):
        p = Socrata(auth)
        name = "some_config %s" % str(uuid.uuid4())
        config = p.configs.create(name, "replace")

        columns = [{
            "field_name": "foo",
            "display_name": "Foo is the display name",
            "transform_expr": "to_number(`foo`)",
            "format": {},
            "description": "",
            "is_primary_key": None,
            "flags": []
        }]

        config = config.update({'data_action': 'update', 'columns': columns})

        self.assertEqual(config.attributes["data_action"], "update")
        self.assertEqual(config.attributes["columns"], columns)
Ejemplo n.º 30
0
    def test_create_config_with_non_defaults(self):
        name = "some_config %s" % str(uuid.uuid4())
        p = Socrata(auth)
        (ok, config) = p.configs.create(name,
                                        "replace",
                                        parse_options={
                                            "encoding": "utf8",
                                            "header_count": 2,
                                            "column_header": 2
                                        },
                                        columns=[{
                                            "field_name":
                                            "foo",
                                            "display_name":
                                            "Foo is the display name",
                                            "transform_expr":
                                            "to_number(`foo`)"
                                        }])
        self.assertTrue(ok, config)
        self.assertEqual(config.attributes['name'], name)

        self.assertEqual(
            config.attributes['parse_options'], {
                "encoding": "utf8",
                "header_count": 2,
                "column_header": 2,
                "quote_char": '"',
                "parse_source": True,
                "column_separator": ",",
                "remove_empty_rows": True,
                "trim_whitespace": True
            })

        self.assertEqual(config.attributes['columns'], [{
            "field_name": "foo",
            "display_name": "Foo is the display name",
            "transform_expr": "to_number(`foo`)",
            "format": {},
            "description": "",
            "is_primary_key": None,
            "flags": []
        }])