Example #1
0
def run_job(job):
    """Worker function to index each row in each table in the database."""
    job.connect_to_zmq()
    job.connect_to_database()
    tables = get_tables(job)

    for tbl in set(tables):
        geo = {}
        has_shape = False
        is_point = False
        shape_field_name = ''

        # --------------------------------------------------------------------------------------------------
        # Get the table schema.
        # --------------------------------------------------------------------------------------------------
        schema = {}
        schema['name'] = tbl

        # Get the primary key.
        qry = "SELECT K.COLUMN_NAME FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS AS C JOIN " \
              "INFORMATION_SCHEMA.KEY_COLUMN_USAGE AS K ON " \
              "C.TABLE_NAME = K.TABLE_NAME AND C.CONSTRAINT_NAME = K.CONSTRAINT_NAME WHERE " \
              "C.CONSTRAINT_TYPE = 'PRIMARY KEY' AND K.TABLE_NAME = '{0}'".format(tbl)
        cols = job.execute_query(qry).fetchall()
        primary_key = ''
        if cols:
            primary_key = cols[0][0]

        # Get the foreign key.
        qry = "SELECT K.COLUMN_NAME FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS AS C JOIN " \
              "INFORMATION_SCHEMA.KEY_COLUMN_USAGE AS K ON " \
              "C.TABLE_NAME = K.TABLE_NAME AND C.CONSTRAINT_NAME = K.CONSTRAINT_NAME WHERE " \
              "C.CONSTRAINT_TYPE = 'FOREIGN KEY' AND K.TABLE_NAME = '{0}'".format(tbl)
        cols = job.execute_query(qry).fetchall()
        foreign_key = ''
        if cols:
            foreign_key = cols[0][0]

        # Get the columns that have indexes.
        qry = "SELECT COL_NAME(ic.object_id,ic.column_id) AS column_name " \
              "FROM sys.indexes AS i INNER JOIN sys.index_columns AS ic ON i.object_id = ic.object_id " \
              "AND i.index_id = ic.index_id WHERE i.object_id = OBJECT_ID('{0}')".format(tbl)
        cols = job.execute_query(qry).fetchall()
        indexed_cols = []
        if cols:
            for col in cols:
                indexed_cols.append(col[0])

        schema_columns = []
        for col in job.db_cursor.columns(table=tbl).fetchall():
            column = {}
            props = []
            column['name'] = col.column_name
            column['type'] = col.type_name
            if col.type_name == 'geometry':
                column['isGeo'] = True
                column['crs'] = job.db_cursor.execute("select {0}.STSrid from {1}".format(col.column_name, tbl)).fetchone()[0]
            if col.column_name == primary_key:
                props.append('PRIMARY KEY')
            if col.column_name == foreign_key:
                props.append('FOREIGN KEY')
            if col.column_name in indexed_cols:
                props.append('INDEXED')
            if col.is_nullable == 'YES':
                props.append('NULLABLE')
            else:
                props.append('NOTNULLABLE')
            column['properties'] = props
            schema_columns.append(column)
        schema['fields'] = schema_columns

        # --------------------------------
        # Get the list of columns to keep.
        # --------------------------------
        if not job.fields_to_keep == ['*']:
            columns = []
            column_types = {}
            for col in job.fields_to_keep:
                qry = "select column_name, data_type from INFORMATION_SCHEMA.columns where table_name = '{0}' and column_name like '{1}'".format(tbl, col)
                for c in job.execute_query(qry).fetchall():
                    if not c.type_name == 'geometry':
                        columns.append("{0}.{1}".format(tbl, c[0]))
                        column_types[c[0]] = c[1]
                    else:
                        shape_field_name = c.column_name
        else:
            columns = []
            column_types = {}
            for c in job.db_cursor.columns(table=tbl).fetchall():
                if not c.type_name == 'geometry':
                    columns.append("{0}.{1}".format(tbl, c.column_name))
                    column_types[c.column_name] = c.type_name
                else:
                    shape_field_name = c.column_name

        if job.fields_to_skip:
            for col in job.fields_to_skip:
                qry = "select column_name from INFORMATION_SCHEMA.columns where table_name = '{0}' and column_name like '{1}'".format(tbl, col)
                [columns.remove("{0}.{1}".format(tbl, c[0])) for c in job.execute_query(qry).fetchall()]

        # --------------------------------------------------------------------------------------------------------
        # Get the column names and types from the related tables.
        # --------------------------------------------------------------------------------------------------------
        related_columns = []
        if job.related_tables:
            for related_table in job.related_tables:
                for c in job.db_cursor.columns(table=related_table):
                    if not c.type_name == 'geometry':
                        related_columns.append("{0}.{1}".format(related_table, c.column_name))

        # --------------------------------------------------------------------------------------------------------
        # Check for a geometry column and pull out X,Y for points and extent coordinates for other geometry types.
        # --------------------------------------------------------------------------------------------------------
        geom_type = ''
        if shape_field_name:
            has_shape = True
            srid = job.db_cursor.execute("select {0}.STSrid from {1}".format(shape_field_name, tbl)).fetchone()[0]
            geo['code'] = srid
            geom_type = job.db_cursor.execute("select {0}.STGeometryType() from {1}".format(shape_field_name, tbl)).fetchone()[0]
            if geom_type == 'Point':
                is_point = True
                columns.insert(0, "{0}.{1}.STPointN(1).STX as X".format(tbl, shape_field_name))
                columns.insert(0, "{0}.{1}.STPointN(1).STY as Y".format(tbl, shape_field_name))
            else:
                columns.insert(0, "{0}.{1}.STEnvelope().STPointN((3)).STY as YMAX".format(tbl, shape_field_name))
                columns.insert(0, "{0}.{1}.STEnvelope().STPointN((3)).STX as XMAX".format(tbl, shape_field_name))
                columns.insert(0, "{0}.{1}.STEnvelope().STPointN((1)).STY as YMIN".format(tbl, shape_field_name))
                columns.insert(0, "{0}.{1}.STEnvelope().STPointN((1)).STX as XMIN".format(tbl, shape_field_name))
                columns.insert(0, "{0}.{1}.STAsText() as WKT".format(tbl, shape_field_name))

        # -----------------------------
        # Query the table for the rows.
        # -----------------------------
        sql_query = job.get_table_query(tbl)
        if not sql_query:
            row_count = float(job.db_cursor.execute("select Count(*) from {0}".format(tbl)).fetchone()[0])
            rows = job.db_cursor.execute("select {0} from {1}".format(','.join(columns), tbl))
        else:
            q = re.search('FROM(.*)', sql_query, re.IGNORECASE).group(0)
            try:
                row_count = float(job.db_cursor.execute("select Count(*) {0}".format(q)).fetchone()[0])
            except Exception:
                row_count = float(job.db_cursor.execute("select Count(*) {0}".format(q.split('ORDER BY')[0])).fetchone()[0])
            rows = job.execute_query("select {0} {1}".format(','.join(columns + related_columns), q))

        # -----------------------------------------------------------------------------
        # Index each row in the table. If there are relates, index the related records.
        # -----------------------------------------------------------------------------
        cur_id = -1
        entry = {}
        link = {}
        wkt_col = -1
        action_type = job.action_type
        discovery_id = job.discovery_id
        location_id = job.location_id
        columns = [c.split('.')[1] for c in columns]
        mapped_fields = job.map_fields(tbl, columns, column_types)
        increment = job.get_increment(row_count)
        if 'WKT' in columns:
            has_shape = True
            try:
                wkt_col = mapped_fields.index('fs_WKT')
            except ValueError:
                wkt_col = mapped_fields.index('WKT')
        geometry_ops = worker_utils.GeometryOps()
        generalize_value = job.generalize_value


        # -----------------------------------------------
        # Add an entry for the table itself with schema.
        # -----------------------------------------------
        mapped_cols = {}
        schema['rows'] = row_count
        table_entry = {}
        table_entry['id'] = '{0}_{1}'.format(location_id, tbl)
        table_entry['location'] = location_id
        table_entry['action'] = action_type
        table_entry['format_type'] = 'Schema'
        table_entry['entry'] = {'fields': {'_discoveryID': discovery_id, 'name': tbl, 'path': job.sql_server_connection_str, 'format_type': 'Schema'}}
        table_entry['entry']['fields']['schema'] = schema
        job.send_entry(table_entry)

        for i, row in enumerate(rows):
            if not cur_id == row[0] or not job.related_tables:
                if entry:
                    try:
                        job.send_entry(entry)
                    except Exception as ex:
                        entry = {}
                        continue
                    entry = {}
                if has_shape:
                    if is_point:
                        geo['lon'] = row[1]
                        geo['lat'] = row[0]
                        mapped_cols = dict(zip(mapped_fields[2:], row[2:]))
                        mapped_cols['geometry_type'] = 'Point'
                    else:
                        if generalize_value == 0 or generalize_value == 0.0:
                            if wkt_col >= 0:
                                geo['wkt'] = row[wkt_col]
                                mapped_cols = dict(zip(mapped_fields, row))
                            else:
                                geo['wkt'] = row[0]
                        elif generalize_value > 0.9:
                            if wkt_col >= 0:
                                geo['wkt'] = row[wkt_col]
                                mapped_cols = dict(zip(mapped_fields, row))
                            else:
                                geo['xmin'] = row[1]
                                geo['ymin'] = row[2]
                                geo['xmax'] = row[3]
                                geo['ymax'] = row[4]
                        else:
                            if wkt_col >= 0:
                                geo['wkt'] = geometry_ops.generalize_geometry(str(row[wkt_col]), generalize_value)
                                mapped_cols = dict(zip(mapped_fields, row))
                            else:
                                geo['wkt'] = geometry_ops.generalize_geometry(str(row[0]), generalize_value)
                        if not mapped_cols:
                            mapped_cols = dict(zip(mapped_fields[5:], row[5:]))
                        if 'Polygon' in geom_type:
                            mapped_cols['geometry_type'] = 'Polygon'
                        elif 'Polyline' in geom_type:
                            mapped_cols['geometry_type'] = 'Polyline'
                        else:
                            mapped_cols['geometry_type'] = 'Point'
                else:
                    mapped_cols = dict(zip(mapped_fields, row))

                # Create an entry to send to ZMQ for indexing.
                mapped_cols['format_type'] = 'Record'
                mapped_cols['format'] = 'application/vnd.sqlserver.record'
                if 'id' in mapped_cols:
                    mapped_cols['id'] = '{0}{1}'.format(random.randint(0, 1000000), mapped_cols['id'])
                else:
                    mapped_cols['id'] = "{0}{1}".format(random.randint(0, 1000000), i)
                entry['id'] = '{0}_{1}_{2}'.format(location_id, tbl, i)
                entry['location'] = location_id
                entry['action'] = action_type

                # If the table supports relates/joins, handle them and add them as links.
                if job.related_tables:
                    links = []
                    related_field_names = [d[0] for d in row.cursor_description[len(columns):]]
                    related_field_types = dict(zip(related_field_names, [d[1] for d in row.cursor_description[len(columns):]]))
                    mapped_related_fields = []
                    for related_table in job.related_tables:
                        mapped_related_fields += job.map_fields(related_table, related_field_names, related_field_types)
                    link['relation'] = 'contains'
                    link = dict(zip(mapped_related_fields, row[len(columns):]))
                    try:
                        link['id'] = "{0}{1}".format(random.randint(0, 1000000), link['id'])
                    except KeyError:
                        link['id'] = "{0}{1}".format(random.randint(0, 1000000), i)

                    # Send this link as an entry and set extract to true.
                    link_entry = {}
                    link_entry['id'] = "{0}{1}".format(link['id'], location_id)
                    link_entry['action'] = action_type
                    link_entry['entry'] = {"fields": link}
                    if job.format:
                        link_entry['entry']['fields']['__to_extract'] = True
                    job.send_entry(link_entry)
                    # Append the link to a list that will be part of the main entry.
                    links.append(link)
                    if geo:
                        entry['entry'] = {'geo': geo, 'fields': mapped_cols, 'links': links}
                    else:
                        entry['entry'] = {'fields': mapped_cols, 'links': links}
                else:
                    if geo:
                        entry['entry'] = {'geo': geo, 'fields': mapped_cols}
                    else:
                        entry['entry'] = {'fields': mapped_cols}
                    entry['entry']['fields']['_discoveryID'] = discovery_id
                entry['entry']['fields']['_discoveryID'] = discovery_id
                cur_id = row[0]
            else:
                link['relation'] = 'contains'
                link = dict(zip(mapped_related_fields, row[len(columns):]))
                try:
                    link['id'] = "{0}{1}".format(random.randint(0, 1000000), link['id'])
                except KeyError:
                    link['id'] = "{0}{1}".format('0000', i)
                link_entry = {}
                link_entry['id'] = "{0}{1}".format(link['id'], location_id)
                link_entry['action'] = action_type
                link_entry['entry'] = {"fields": link}
                if job.format:
                    link_entry['entry']['fields']['__to_extract'] = True
                job.send_entry(link_entry)

                links.append(link)
                entry['entry']['links'] = entry['entry'].pop('links', links)

            # Report status percentage.
            if (i % increment) == 0:
                status_writer.send_percent(i / row_count, '{0}: {1:%}'.format(tbl, i / row_count), 'sql_server')

        # Send final entry.
        job.send_entry(entry)
        status_writer.send_percent(1, '{0}: {1:%}'.format(tbl, 1), 'sql_server')
Example #2
0
def run_job(mongodb_job):
    """Worker function to index each document in each collection in the database."""
    job = mongodb_job
    job.connect_to_zmq()
    job.connect_to_database()
    collection_names = get_collections(job)

    grid_fs = None
    for collection_name in collection_names:
        if job.has_gridfs:
            if collection_name.find('.files') > 0:
                grid_fs = gridfs.GridFS(job.db_connection, collection_name.split('.')[0])

        col = job.db_connection[collection_name]
        query = job.get_table_query(col)
        if query:
            documents = col.find(eval(query))
        else:
            documents = col.find()

        # Index each document -- get a suitable base 10 increment for reporting percentage.
        increment = job.get_increment(documents.count())
        geometry_ops = worker_utils.GeometryOps()
        generalize_value = job.generalize_value
        for i, doc in enumerate(documents):
            fields = doc.keys()
            field_types = dict((k, type(v)) for k, v in doc.iteritems())
            if grid_fs:
                grid_out = grid_fs.get(doc['_id'])
                if hasattr(grid_out, 'metadata'):
                    #TODO: Determine how to ingest files stored in the database.
                    #with open(r"c:\temp\{0}".format(grid_out.filename), "wb") as fp:
                        #fp.write(grid_out.read())
                    fields += grid_out.metadata.keys()
                    field_types = dict(field_types.items() + dict((k, type(v)) for k, v in grid_out.metadata.iteritems()).items())
                    values = [doc[k] for k in doc.keys() if not k == 'metadata']
                    values += grid_out.metadata.values()
                    fields.remove('metadata')
            else:
                values = doc.values()
            entry = {}
            geo = {}
            geo_json_converter = worker_utils.GeoJSONConverter()
            if 'loc' in doc:
                if 'type' in doc['loc']:
                    if 'bbox' in doc['loc']:
                        if job.include_wkt:
                            wkt = geo_json_converter.convert_to_wkt(doc['loc'], 3)
                            if generalize_value == 0:
                                geo['wkt'] = wkt
                            else:
                                geo['wkt'] = geometry_ops.generalize_geometry(wkt, generalize_value)
                        geo['xmin'] = doc['loc']['bbox'][0]
                        geo['ymin'] = doc['loc']['bbox'][1]
                        geo['xmax'] = doc['loc']['bbox'][2]
                        geo['ymax'] = doc['loc']['bbox'][3]
                    elif 'Point' in doc['loc']['type']:
                        if job.include_wkt:
                            geo['wkt'] = geo_json_converter.convert_to_wkt(doc['loc'], 3)
                        geo['lon'] = doc['loc']['coordinates'][0]
                        geo['lat'] = doc['loc']['coordinates'][1]
                    else:
                        status_writer.send_state(status.STAT_WARNING, 'No bbox information for {0}.'.format(doc['_id']))
                elif isinstance(doc['loc'][0], float):
                    geo['lon'] = doc['loc'][0]
                    geo['lat'] = doc['loc'][1]
                else:
                    geo['xmin'] = doc['loc'][0][0]
                    geo['xmax'] = doc['loc'][0][1]
                    geo['ymin'] = doc['loc'][1][0]
                    geo['ymax'] = doc['loc'][1][1]
                fields.remove('loc')
            mapped_fields = job.map_fields(col.name, fields, field_types)
            mapped_fields = dict(zip(mapped_fields, values))
            mapped_fields['_discoveryID'] = job.discovery_id
            mapped_fields['title'] = col.name
            mapped_fields['format_type'] = 'Record'
            mapped_fields['format'] = 'application/vnd.mongodb.record'
            entry['id'] = str(doc['_id'])
            entry['location'] = job.location_id
            entry['action'] = job.action_type
            entry['entry'] = {'geo': geo, 'fields': mapped_fields}
            job.send_entry(entry)
            if (i % increment) == 0:
                status_writer.send_percent(float(i) / documents.count(),
                                           '{0}: {1:%}'.format(collection_name, float(i)/documents.count()),
                                           'MongoDB')

        schema = {}
        schema['name'] = col.full_name
        schema['name'] = collection_name
        schema['rows'] = documents.count()
        schema_columns = []
        index_info = col.index_information()
        indexes = []
        for v in index_info.values():
            indexes.append(v['key'][0][0])
        for n, t in dict(zip(fields, field_types.values())).items():
            column = {}
            props = []
            column['name'] = n
            column['type'] = t
            if n in indexes:
                props.append('INDEXED')
            if 'ObjectId' in str(t) or n == '_id':
                props.append('NOTNULLABLE')
                props.append('PRIMARY KEY')
            else:
                props.append('NULLABLE')
            column['properties'] = props
            schema_columns.append(column)
        if geo:
            props = []
            if 'loc' in indexes:
                props.append('INDEXED')
            schema_columns.append({'name': 'loc', 'isGeo': True, 'properties': props})
        schema['fields'] = schema_columns

        # Add an entry for the table itself with schema.
        table_entry = {}
        table_entry['id'] = '{0}_{1}'.format(job.location_id, collection_name)
        table_entry['location'] = job.location_id
        table_entry['action'] = job.action_type
        table_entry['format_type'] = 'Schema'
        table_entry['entry'] = {'fields': {'_discoveryID': job.discovery_id, 'name': collection_name, 'path': job.mongodb_client_info}}
        table_entry['entry']['fields']['schema'] = schema
        job.send_entry(table_entry)
Example #3
0
def run_job(oracle_job):
    """Worker function to do the indexing."""
    job = oracle_job
    job.connect_to_zmq()
    job.connect_to_database()
    job.db_cursor.arraysize = 250

    all_tables = []
    all_tables += get_tables(job)
    all_tables += get_layers(job)
    all_tables += get_views(job)

    if not all_tables:
        status_writer.send_state(
            status.STAT_FAILED,
            "No tables, views or layers found. Check the configuration.")
        return

    # Begin indexing.
    for tbl in set(all_tables):
        geo = {}
        columns = []
        column_types = {}
        is_point = False
        has_shape = False
        geometry_field = None
        shape_type = None

        # ----------------------------------------------------------------------------
        # Check if the table is a layer/view and set name as "owner.table".
        # ----------------------------------------------------------------------------
        if isinstance(tbl, tuple):
            column_query = "select column_name, data_type from all_tab_cols where " \
                           "table_name = '{0}' and column_name like".format(tbl[0])
            query = job.get_table_query(tbl[0])
            tbl = "{0}.{1}".format(tbl[1], tbl[0])
        else:
            query = job.get_table_query(tbl)
            column_query = "select column_name, data_type from all_tab_cols where " \
                           "table_name = '{0}' and column_name like".format(tbl)

        # -----------------------------------------------------------------------------------------------------
        # Get the table schema.
        # -----------------------------------------------------------------------------------------------------
        table_schema = {}
        schema_columns = []
        table_schema['name'] = tbl

        # Get primary key and foreign keys
        primary_key_col = ''
        foreign_key_col = ''
        primary_key_qry = "select c.table_name, c.column_name from all_constraints cons, all_cons_columns c " \
            "where c.table_name = '{0}' and cons.constraint_type like 'P%' " \
            "and cons.constraint_name = c.constraint_name".format(tbl)
        primary_cols = job.execute_query(primary_key_qry).fetchone()
        if primary_cols:
            primary_key_col = primary_cols[1]
        foreign_key_qry = "select c.table_name, c.column_name from all_constraints cons, all_cons_columns c " \
            "where c.table_name = '{0}' and cons.constraint_type like 'F%' " \
            "and cons.constraint_name = c.constraint_name".format(tbl)
        foreign_cols = job.execute_query(foreign_key_qry).fetchone()
        if foreign_cols:
            foreign_key_col = foreign_cols[1]

        # Get columns that are indexed.
        if "." in tbl:
            owner, table = tbl.split('.')
            index_query = "select column_name from all_ind_columns where table_name = '{0}' and index_owner = '{1}'".format(
                table, owner)
        else:
            index_query = "select column_name from all_ind_columns where table_name = '{0}'".format(
                tbl)
        index_columns = [
            c[0] for c in job.execute_query(index_query).fetchall()
        ]

        for i, c in enumerate(
                job.execute_query(
                    "select * from {0}".format(tbl)).description):
            schema_col = {}
            schema_props = []
            schema_col['name'] = c[0]
            try:
                schema_col['type'] = field_types[c[1]]
            except (AttributeError, KeyError):
                schema_col['type'] = 'OBJECTVAR'
            try:
                if c[1] in ('SDO_GEOMETRY', 'ST_GEOMETRY'):
                    schema_col['isGeo'] = True
                elif job.db_cursor.fetchvars[i].type.name == 'ST_GEOMETRY':
                    schema_col['isGeo'] = True
                elif job.db_cursor.fetchvars[i].type.name == 'SDO_GEOMETRY':
                    schema_col['isGeo'] = True
            except AttributeError:
                pass
            if c[6] == 1:
                schema_props.append('NULLABLE')
            else:
                schema_props.append('NOTNULLABLE')
            if c[0] == primary_key_col:
                schema_props.append('PRIMARY KEY')
            if c[0] == foreign_key_col:
                schema_props.append('FOREIGN KEY')
            if c[0] in index_columns:
                schema_props.append('INDEXED')
            if schema_props:
                schema_col['properties'] = schema_props
            schema_columns.append(schema_col)
        table_schema['fields'] = schema_columns

        # ---------------------------------------------------------------------------------------
        # Create the list of columns and column types to include in the index.
        # ---------------------------------------------------------------------------------------
        if not job.fields_to_keep == ['*']:
            for col in job.fields_to_keep:
                for c in job.db_cursor.execute("{0} '{1}'".format(
                        column_query, col)).fetchall():
                    columns.append(c[0])
                    column_types[c[0]] = c[1]
                    if c[1] in ('SDO_GEOMETRY', 'ST_GEOMETRY'):
                        has_shape = True
                        geometry_field = c[0]
                        geometry_type = c[1]
        else:
            for i, c in enumerate(
                    job.db_cursor.execute(
                        "select * from {0}".format(tbl)).description):
                columns.append(c[0])
                column_types[c[0]] = c[1]
                try:
                    if c[1] in ('SDO_GEOMETRY', 'ST_GEOMETRY'):
                        has_shape = True
                        geometry_field = c[0]
                        geometry_type = c[1]
                    elif job.db_cursor.fetchvars[i].type.name == 'ST_GEOMETRY':
                        has_shape = True
                        geometry_field = c[0]
                        geometry_type = 'ST_GEOMETRY'
                    elif job.db_cursor.fetchvars[
                            i].type.name == 'SDO_GEOMETRY':
                        has_shape = True
                        geometry_field = c[0]
                        geometry_type = 'SDO_GEOMETRY'
                except AttributeError:
                    continue

        # -----------------------------------
        # Remove fields meant to be excluded.
        # -----------------------------------
        if job.fields_to_skip:
            for col in job.fields_to_skip:
                [
                    columns.remove(c[0])
                    for c in job.execute_query("{0} '{1}'".format(
                        column_query, col)).fetchall()
                ]

        # -----------------------------------------------------------
        # If there is a shape column, get the geographic information.
        # -----------------------------------------------------------
        if geometry_field:
            columns.remove(geometry_field)

            if job.db_cursor.execute("select {0} from {1}".format(
                    geometry_field, tbl)).fetchone() is None:
                status_writer.send_status(
                    "Skipping {0} - no records.".format(tbl))
                continue
            else:
                schema = job.db_cursor.execute("select {0} from {1}".format(
                    geometry_field, tbl)).fetchone()[0].type.schema

            # Figure out if geometry type is ST or SDO.
            if geometry_type == 'SDO_GEOMETRY':
                geo['code'] = job.db_cursor.execute(
                    "select c.{0}.SDO_SRID from {1} c".format(
                        geometry_field, tbl)).fetchone()[0]
                # dimension = job.db_cursor.execute("select c.shape.Get_Dims() from {0} c".format(tbl)).fetchone()[0]
                if not job.db_cursor.execute(
                        "select c.{0}.SDO_POINT from {1} c".format(
                            geometry_field, tbl)).fetchone()[0] is None:
                    is_point = True
                    if geo['code'] == 4326:
                        columns.insert(
                            0, '{0}.{1}.SDO_POINT.Y'.format(
                                schema, geometry_field))
                        columns.insert(
                            0, '{0}.{1}.SDO_POINT.X'.format(
                                schema, geometry_field))
                    else:
                        job.db_cursor.execute(
                            "SDO_CS.TRANSFORM({0}.{1}, 4326)".format(
                                schema, geometry_field))
                else:
                    columns.insert(
                        0, "sdo_geom.sdo_mbr({0}).sdo_ordinates".format(
                            geometry_field))
            else:  # ST_GEOMETRY
                shape_type = job.db_cursor.execute(
                    "select {0}.ST_GEOMETRYTYPE({1}) from {2}".format(
                        schema, geometry_field, tbl)).fetchone()[0]
                geo['code'] = int(
                    job.db_cursor.execute(
                        "select {0}.ST_SRID({1}) from {2}".format(
                            schema, geometry_field, tbl)).fetchone()[0])
                if 'POINT' in shape_type:
                    is_point = True
                    if geo['code'] == 4326 or geo['code'] == 3:
                        for x in ('y', 'x', 'astext'):
                            columns.insert(
                                0, '{0}.st_{1}({2})'.format(
                                    schema, x, geometry_field))
                    else:
                        for x in ('y', 'x', 'astext'):
                            columns.insert(
                                0, '{0}.st_{1}({0}.st_transform({2}, 4326))'.
                                format(schema, x, geometry_field))
                else:
                    if geo['code'] == 4326:
                        for x in ('maxy', 'maxx', 'miny', 'minx', 'astext'):
                            columns.insert(
                                0, '{0}.st_{1}({2})'.format(
                                    schema, x, geometry_field))
                    else:
                        try:
                            job.db_cursor.execute(
                                "select {0}.st_maxy(SDE.st_transform({1}, 4326)) from {2}"
                                .format(schema, geometry_field, tbl))
                            for x in ('maxy', 'maxx', 'miny', 'minx',
                                      'astext'):
                                columns.insert(
                                    0,
                                    '{0}.st_{1}({0}.st_transform({2}, 4326))'.
                                    format(schema, x, geometry_field))
                        except Exception:
                            for x in ('maxy', 'maxx', 'miny', 'minx',
                                      'astext'):
                                columns.insert(
                                    0, '{0}.st_{1}({2})'.format(
                                        schema, x, geometry_field))

        # -------------------------------------------------
        # Drop astext from columns if WKT is not requested.
        # -------------------------------------------------
        # include_wkt = job.include_wkt
        # if not include_wkt and not geometry_type == 'SDO_GEOMETRY':
        #     columns.pop(0)

        # ------------------------------------------------------------
        # Get the count of all the rows to use for reporting progress.
        # ------------------------------------------------------------
        if query:
            row_count = job.db_cursor.execute(
                "select count(*) from {0} where {1}".format(
                    tbl, query)).fetchall()[0][0]
        else:
            row_count = job.db_cursor.execute(
                "select count(*) from {0}".format(tbl)).fetchall()[0][0]
        if row_count == 0 or row_count is None:
            continue
        else:
            row_count = float(row_count)

        # ---------------------------
        # Get the rows to be indexed.
        # ---------------------------
        try:
            if geometry_type == 'SDO_GEOMETRY':
                rows = job.db_cursor.execute("select {0} from {1} {2}".format(
                    ','.join(columns), tbl, schema))
            else:
                # Quick check to ensure ST_GEOMETRY operations are supported.
                row = job.db_cursor.execute("select {0} from {1}".format(
                    ','.join(columns), tbl)).fetchone()
                del row
                if query:
                    rows = job.db_cursor.execute(
                        "select {0} from {1} where {2}".format(
                            ','.join(columns), tbl, query))
                else:
                    rows = job.db_cursor.execute("select {0} from {1}".format(
                        ','.join(columns), tbl))
        except Exception:
            # This can occur for ST_GEOMETRY when spatial operators are un-available (See: http://tinyurl.com/lvvhwyl)
            columns.pop(0)
            geo['wkt'] = None
            if query:
                rows = job.db_cursor.execute(
                    "select {0} from {1} where {2}".format(
                        ','.join(columns), tbl, query))
            else:
                rows = job.db_cursor.execute("select {0} from {1}".format(
                    ','.join(columns), tbl))

        # Continue if the table has zero records.
        if not rows:
            status_writer.send_status("Skipping {0} - no records.".format(tbl))
            continue

        # ---------------------------------------------------------
        # Index each row.
        # ---------------------------------------------------------
        mapped_fields = job.map_fields(tbl, columns, column_types)
        increment = job.get_increment(row_count)
        location_id = job.location_id
        action_type = job.action_type
        discovery_id = job.discovery_id
        entry = {}

        # First, add an entry for the table itself with schema.
        table_schema['rows'] = row_count
        table_entry = {}
        table_entry['id'] = '{0}_{1}'.format(location_id, tbl)
        table_entry['location'] = location_id
        table_entry['action'] = action_type
        table_entry['format_type'] = 'Schema'
        table_entry['entry'] = {
            'fields': {
                '_discoveryID': discovery_id,
                'name': tbl,
                'path': rows.connection.dsn
            }
        }
        table_entry['entry']['fields']['schema'] = table_schema
        job.send_entry(table_entry)

        if not has_shape:
            for i, row in enumerate(rows):
                try:
                    # Map column names to Voyager fields.
                    mapped_cols = dict(izip(mapped_fields, row))
                    if has_shape:
                        [mapped_cols.pop(name) for name in geom_fields]
                    mapped_cols['_discoveryID'] = discovery_id
                    mapped_cols['meta_table_name'] = tbl
                    mapped_cols['format_type'] = 'Record'
                    mapped_cols['format'] = 'application/vnd.oracle.record'
                    entry['id'] = '{0}_{1}_{2}'.format(location_id, tbl, i)
                    entry['location'] = location_id
                    entry['action'] = action_type
                    entry['entry'] = {'fields': mapped_cols}
                    job.send_entry(entry)
                    if (i % increment) == 0:
                        status_writer.send_percent(
                            i / row_count,
                            "{0}: {1:%}".format(tbl, i / row_count),
                            'oracle_worker')
                except Exception as ex:
                    status_writer.send_status(ex)
                    continue
        else:
            geom_fields = [
                name for name in mapped_fields
                if '{0}'.format(geometry_field) in name
            ]
            geometry_ops = worker_utils.GeometryOps()
            generalize_value = job.generalize_value
            for i, row in enumerate(rows):
                try:
                    # if include_wkt:
                    if is_point:
                        geo['lon'] = row[1]
                        geo['lat'] = row[2]
                    else:
                        if generalize_value == 0 or generalize_value == 0.0:
                            geo['wkt'] = row[0]
                        elif generalize_value > 0.9:
                            geo['xmin'] = row[1]
                            geo['ymin'] = row[2]
                            geo['xmax'] = row[3]
                            geo['ymax'] = row[4]
                        else:
                            geo['wkt'] = geometry_ops.generalize_geometry(
                                str(row[0]), generalize_value)

                # else:
                #     if is_point:
                #         geo['lon'] = row[0]
                #         geo['lat'] = row[1]
                #     else:
                #         if geometry_type == 'SDO_GEOMETRY':
                #             if dimension == 3:
                #                 geo['xmin'] = row[0][0]
                #                 geo['ymin'] = row[0][1]
                #                 geo['xmax'] = row[0][3]
                #                 geo['ymax'] = row[0][4]
                #             elif dimension == 2:
                #                 geo['xmin'] = row[0][0]
                #                 geo['ymin'] = row[0][1]
                #                 geo['xmax'] = row[0][2]
                #                 geo['ymax'] = row[0][3]
                #         else:
                #             geo['xmin'] = row[0]
                #             geo['ymin'] = row[1]
                #             geo['xmax'] = row[2]
                #             geo['ymax'] = row[3]

                # Map column names to Voyager fields.
                    mapped_cols = dict(izip(mapped_fields, row))
                    [mapped_cols.pop(name) for name in geom_fields]
                    mapped_cols['_discoveryID'] = discovery_id
                    mapped_cols['meta_table_name'] = tbl
                    mapped_cols['format_type'] = 'Record'
                    mapped_cols['format'] = 'application/vnd.oracle.record'
                    entry['id'] = '{0}_{1}_{2}'.format(location_id, tbl, i)
                    entry['location'] = location_id
                    entry['action'] = action_type
                    entry['entry'] = {'geo': geo, 'fields': mapped_cols}
                    job.send_entry(entry)
                    if (i % increment) == 0:
                        status_writer.send_percent(
                            i / row_count,
                            "{0}: {1:%}".format(tbl, i / row_count),
                            'oracle_worker')
                except Exception as ex:
                    status_writer.send_status(ex)
                    continue
Example #4
0
def run_job(mysql_job):
    """Worker function to index each row in each table in the MySQL database."""
    job = mysql_job
    job.connect_to_zmq()
    job.connect_to_database()
    tables = get_tables(job)
    processed = 0
    for table in tables:
        geo = {}
        is_point = False
        has_shape = False

        # --------------------------------------------------------------------------------------------------
        # Get the table schema.
        # --------------------------------------------------------------------------------------------------
        schema = {}
        # Get the primary key.
        qry = "SELECT column_name FROM information_schema.table_constraints JOIN information_schema.key_column_usage USING (CONSTRAINT_NAME, TABLE_NAME) WHERE CONSTRAINT_TYPE = 'PRIMARY KEY' AND table_name = '{0}'".format(
            table)
        cols = job.execute_query(qry).fetchall()
        primary_key = ''
        if cols:
            primary_key = cols[0][0]

        # Get the columns which are indexed.
        qry = "SELECT *  FROM information_schema.statistics WHERE table_name = '{0}'".format(
            table)
        cols = job.execute_query(qry).fetchall()
        indexed_cols = []
        if cols:
            for col in cols:
                indexed_cols.append(col.COLUMN_NAME)

        schema_columns = []
        for col in job.db_cursor.columns(table=table).fetchall():
            column = {}
            props = []
            column['name'] = col.column_name
            column['type'] = col.type_name
            if col.column_name == primary_key:
                props.append('PRIMARY KEY')
            if col.column_name in indexed_cols:
                props.append('INDEXED')
            if col.is_nullable == 'YES':
                props.append('NULLABLE')
            else:
                props.append('NOTNULLABLE')
            column['properties'] = props
            schema_columns.append(column)
        schema['fields'] = schema_columns
        schema['name'] = table

        # --------------------------------------------------------------------------------------------------
        # Set up the fields to index.
        # --------------------------------------------------------------------------------------------------
        columns = []
        column_types = {}
        if not job.fields_to_keep == ['*']:
            for col in job.fields_to_keep:
                qry = "select column_name, data_type from information_schema.columns where table_name = '{0}' and column_name like '{1}'".format(
                    table, col)
                for col in job.execute_query(qry).fetchall():
                    columns.append(col[0])
                    column_types[col[0]] = col[1]
        else:
            for col in job.db_cursor.columns(table=table).fetchall():
                columns.append(col.column_name)
                column_types[col.column_name] = col.type_name

        if job.fields_to_skip:
            for col in job.fields_to_skip:
                qry = "select column_name from information_schema.columns where table_name = '{0}' AND column_name like '{1}'".format(
                    table, col)
                [
                    columns.remove(col[0])
                    for col in job.execute_query(qry).fetchall()
                ]

        # ------------------------------------------------------------------------------------------------
        # Get the query information.
        # ------------------------------------------------------------------------------------------------
        query = job.get_table_query(table)
        constraint = job.get_table_constraint(table)
        if query and constraint:
            expression = """{0} AND {1}""".format(query, constraint)
        else:
            if query:
                expression = query
            else:
                expression = constraint

        # --------------------------------------------------------------------------------------------------------
        # Check for a geometry column and pull out X,Y for points and extent coordinates for other geometry types.
        # --------------------------------------------------------------------------------------------------------
        for col in job.db_cursor.columns(table=table).fetchall():
            if col.type_name == 'geometry':
                has_shape = True
                srid = job.db_cursor.execute(
                    "select SRID({0}) from {1}".format(col.column_name,
                                                       table)).fetchone()[0]
                geo['code'] = srid
                geom_type = job.db_cursor.execute(
                    "select GeometryType({0}) from {1}".format(
                        col.column_name, table)).fetchone()[0]
                if geom_type == 'POINT':
                    is_point = True
                    columns.insert(0, "X({0})".format(col.column_name))
                    columns.insert(0, "Y({0})".format(col.column_name))
                    columns.insert(0, "AsText({0})".format(col.column_name))
                else:
                    columns.insert(
                        0, "AsText(Envelope({0}))".format(col.column_name))
                columns.remove(col.column_name)
                column_types.pop(col.column_name)
                break

        # ------------------------------
        # Query the table for the rows.
        # ------------------------------
        if not expression:
            rows = job.db_cursor.execute("select {0} from {1}".format(
                ','.join(columns), table))
        else:
            rows = job.db_cursor.execute(
                "select {0} from {1} where {2}".format(','.join(columns),
                                                       table, expression))

        # --------------------------------------
        # Remove shape columns from field list.
        # --------------------------------------
        for x in ("X({0})".format(col.column_name),
                  "Y({0})".format(col.column_name),
                  "AsText({0})".format(col.column_name)):
            try:
                columns.remove(x)
            except ValueError:
                continue

        # -----------------------------
        # Index each row in the table.
        # -----------------------------
        entry = {}
        location_id = job.location_id
        discovery_id = job.discovery_id
        action_type = job.action_type
        mapped_fields = job.map_fields(table, columns, column_types)
        new_fields = job.new_fields
        row_count = float(rows.rowcount)
        schema['rows'] = row_count
        increment = job.get_increment(row_count)
        geometry_ops = worker_utils.GeometryOps()
        generalize_value = job.generalize_value

        # Add an entry for the table itself with schema.
        table_entry = {}
        table_entry['id'] = '{0}_{1}'.format(location_id, table)
        table_entry['location'] = location_id
        table_entry['action'] = action_type
        table_entry['format_type'] = 'Schema'
        table_entry['entry'] = {
            'fields': {
                '_discoveryID': discovery_id,
                'name': table,
                'path': job.sql_server_connection_str
            }
        }
        table_entry['entry']['fields']['schema'] = schema
        job.send_entry(table_entry)

        for i, row in enumerate(rows):
            if has_shape:
                if is_point:
                    if job.include_wkt:
                        geo['wkt'] = row[0]
                    geo['lon'] = row[2]
                    geo['lat'] = row[1]
                    mapped_cols = dict(zip(mapped_fields[3:], row[3:]))
                    mapped_cols['geometry_type'] = 'Point'
                    for nf in new_fields:
                        if nf['name'] == '*' or nf['name'] == table:
                            for k, v in nf['new_fields'].iteritems():
                                mapped_fields[k] = v
                else:
                    if job.include_wkt:
                        if generalize_value == 0:
                            geo['wkt'] = row[0]
                        else:
                            geo['wkt'] = geometry_ops.generalize_geometry(
                                str(row[0]), generalize_value)
                    nums = re.findall("-?(?:\.\d+|\d+(?:\.\d*)?)",
                                      row[0].rpartition(',')[0])
                    geo['xmin'] = float(nums[0])
                    geo['ymin'] = float(nums[1])
                    geo['xmax'] = float(nums[4])
                    geo['ymax'] = float(nums[5])
                    mapped_cols = dict(zip(mapped_fields[1:], row[1:]))
                    for nf in new_fields:
                        if nf['name'] == '*' or nf['name'] == table:
                            for k, v in nf['new_fields'].iteritems():
                                mapped_fields[k] = v
                    if 'POLYGON' in geom_type:
                        mapped_cols['geometry_type'] = 'Polygon'
                    else:
                        mapped_cols['geometry_type'] = 'Polyline'
            else:
                mapped_cols = dict(zip(mapped_fields, row))
                for nf in new_fields:
                    if nf['name'] == '*' or nf['name'] == table:
                        for k, v in nf['new_fields'].iteritems():
                            mapped_fields[k] = v

            # Create an entry to send to ZMQ for indexing.
            mapped_cols['title'] = table
            mapped_cols['format_type'] = 'Record'
            mapped_cols['format'] = 'application/vnd.mysql.record'
            entry['id'] = '{0}_{1}_{2}'.format(location_id, table, i)
            entry['location'] = location_id
            entry['action'] = action_type
            entry['entry'] = {'geo': geo, 'fields': mapped_cols}
            entry['entry']['fields']['_discoveryID'] = discovery_id
            job.send_entry(entry)
            if (i % increment) == 0:
                status_writer.send_percent(
                    i / row_count, '{0}: {1:%}'.format(table, i / row_count),
                    'MySql')
        processed += i
    status_writer.send_status("Processed: {0}".format(processed))
Example #5
0
def worker(data_path, esri_service=False):
    """The worker function to index feature data and tabular data."""
    if esri_service:
        index_service(job.service_connection)
    else:
        job.connect_to_zmq()
        geo = {}
        entry = {}
        schema = {}
        dsc = arcpy.Describe(data_path)

        try:
            from utils import worker_utils
            geometry_ops = worker_utils.GeometryOps()
        except ImportError:
            geometry_ops = None

        try:
            global_id_field = dsc.globalIDFieldName
        except AttributeError:
            global_id_field = None

        try:
            shape_field_name = dsc.shapeFieldName
        except AttributeError:
            shape_field_name = None

        # Get the table schema.
        table_entry = {}
        schema['name'] = dsc.name
        try:
            alias = dsc.aliasName
        except AttributeError:
            alias = dsc.name
        if not dsc.name == alias:
            schema['alias'] = alias
        schema['OIDFieldName'] = dsc.OIDFieldName
        if shape_field_name:
            schema['shapeFieldName'] = shape_field_name
            schema['wkid'] = dsc.spatialReference.factoryCode
        if global_id_field:
            schema['globalIDField'] = global_id_field
        schema_fields = []
        for fld in dsc.fields:
            field = {}
            props = []
            field['name'] = fld.name
            field['alias'] = fld.aliasName
            field['type'] = fld.type
            field['domain'] = fld.domain
            if fld.isNullable:
                props.append('nullable')
            else:
                props.append('notnullable')
            indexes = dsc.indexes
            if indexes:
                for index in indexes:
                    if fld.name in [f.name for f in index.fields]:
                        props.append('indexed')
                        break
                    else:
                        props.append('notindexed')
                        break
            field['properties'] = props
            schema_fields.append(field)
        schema['fields'] = schema_fields

        if dsc.dataType == 'Table':
            # Get join information.
            table_join = job.get_join(dsc.name)
            if table_join:
                table_view = arcpy.MakeTableView_management(
                    dsc.catalogPath, 'view')
                arcpy.AddJoin_management(
                    table_view, table_join['field'],
                    os.path.join(job.path, table_join['table']),
                    table_join['field'], 'KEEP_COMMON')
            else:
                table_view = dsc.catalogPath

            # Get any query or constraint.
            query = job.get_table_query(dsc.name)
            constraint = job.get_table_constraint(dsc.name)
            if query and constraint:
                expression = """{0} AND {1}""".format(query, constraint)
            else:
                if query:
                    expression = query
                else:
                    expression = constraint

            field_types = job.search_fields(table_view)
            fields = field_types.keys()
            row_count = float(
                arcpy.GetCount_management(table_view).getOutput(0))
            if row_count == 0.0:
                return

            with arcpy.da.SearchCursor(table_view, fields, expression) as rows:
                mapped_fields = job.map_fields(dsc.name, fields, field_types)
                new_fields = job.new_fields
                ordered_fields = OrderedDict()
                for f in mapped_fields:
                    ordered_fields[f] = None
                increment = job.get_increment(row_count)
                for i, row in enumerate(rows, 1):
                    try:
                        if job.domains:
                            row = update_row(dsc.fields, rows, list(row))
                        mapped_fields = dict(zip(ordered_fields.keys(), row))
                        mapped_fields['_discoveryID'] = job.discovery_id
                        mapped_fields['meta_table_name'] = dsc.name
                        mapped_fields['format_category'] = 'GIS'
                        mapped_fields['format_type'] = "Record"
                        mapped_fields[
                            'format'] = "application/vnd.esri.{0}.record".format(
                                dsc.dataType.lower())
                        for nf in new_fields:
                            if nf['name'] == '*' or nf['name'] == dsc.name:
                                for k, v in nf['new_fields'].iteritems():
                                    mapped_fields[k] = v
                        oid_field = filter(
                            lambda x: x in ('FID', 'OID', 'OBJECTID'),
                            rows.fields)
                        if oid_field:
                            fld_index = rows.fields.index(oid_field[0])
                        else:
                            fld_index = i
                        if global_id_field:
                            mapped_fields['meta_{0}'.format(
                                global_id_field)] = mapped_fields.pop(
                                    'fi_{0}'.format(global_id_field))
                        entry['id'] = '{0}_{1}_{2}'.format(
                            job.location_id, os.path.basename(data_path),
                            row[fld_index])
                        entry['location'] = job.location_id
                        entry['action'] = job.action_type
                        entry['entry'] = {'fields': mapped_fields}
                        job.send_entry(entry)
                        if (i % increment) == 0:
                            status_writer.send_percent(
                                i / row_count,
                                "{0} {1:%}".format(dsc.name, i / row_count),
                                'esri_worker')
                    except (AttributeError, RuntimeError):
                        continue
        else:
            generalize_value = job.generalize_value
            sr = arcpy.SpatialReference(4326)
            geo['spatialReference'] = dsc.spatialReference.name
            geo['code'] = dsc.spatialReference.factoryCode

            # Get join information.
            table_join = job.get_join(dsc.name)
            if table_join:
                lyr = arcpy.MakeFeatureLayer_management(dsc.catalogPath, 'lyr')
                arcpy.AddJoin_management(
                    lyr, table_join['input_join_field'],
                    os.path.join(job.path, table_join['table']),
                    table_join['output_join_field'], 'KEEP_COMMON')
            else:
                lyr = dsc.catalogPath

            field_types = job.search_fields(lyr)
            fields = field_types.keys()
            query = job.get_table_query(dsc.name)
            constraint = job.get_table_constraint(dsc.name)
            if query and constraint:
                expression = """{0} AND {1}""".format(query, constraint)
            else:
                if query:
                    expression = query
                else:
                    expression = constraint
            if dsc.shapeFieldName in fields:
                fields.remove(dsc.shapeFieldName)
                field_types.pop(dsc.shapeFieldName)
            elif table_join:
                fields.remove(arcpy.Describe(lyr).shapeFieldName)
                field_types.pop(arcpy.Describe(lyr).shapeFieldName)
            row_count = float(arcpy.GetCount_management(lyr).getOutput(0))
            if row_count == 0.0:
                return
            if dsc.shapeType == 'Point':
                with arcpy.da.SearchCursor(lyr, ['SHAPE@'] + fields,
                                           expression, sr) as rows:
                    mapped_fields = job.map_fields(dsc.name,
                                                   list(rows.fields[1:]),
                                                   field_types)
                    new_fields = job.new_fields
                    ordered_fields = OrderedDict()
                    for f in mapped_fields:
                        ordered_fields[f] = None
                    increment = job.get_increment(row_count)
                    for i, row in enumerate(rows):
                        try:
                            if job.domains:
                                row = update_row(dsc.fields, rows, list(row))
                            if row[0]:
                                geo['lon'] = row[0].firstPoint.X
                                geo['lat'] = row[0].firstPoint.Y
                            mapped_fields = dict(
                                zip(ordered_fields.keys(), row[1:]))
                            mapped_fields['_discoveryID'] = job.discovery_id
                            mapped_fields['meta_table_name'] = dsc.name
                            mapped_fields['format_category'] = 'GIS'
                            mapped_fields['geometry_type'] = 'Point'
                            mapped_fields['format_type'] = 'Feature'
                            mapped_fields[
                                'format'] = "application/vnd.esri.{0}.feature".format(
                                    dsc.dataType.lower())
                            for nf in new_fields:
                                if nf['name'] == '*' or nf['name'] == dsc.name:
                                    for k, v in nf['new_fields'].iteritems():
                                        mapped_fields[k] = v
                            if global_id_field:
                                mapped_fields['meta_{0}'.format(
                                    global_id_field)] = mapped_fields.pop(
                                        'fi_{0}'.format(global_id_field))
                            entry['id'] = '{0}_{1}_{2}'.format(
                                job.location_id, os.path.basename(data_path),
                                i)
                            entry['location'] = job.location_id
                            entry['action'] = job.action_type
                            entry['entry'] = {
                                'geo': geo,
                                'fields': mapped_fields
                            }
                            job.send_entry(entry)
                            if (i % increment) == 0:
                                status_writer.send_percent(
                                    i / row_count,
                                    "{0} {1:%}".format(dsc.name,
                                                       i / row_count),
                                    'esri_worker')
                        except (AttributeError, RuntimeError):
                            continue
            else:
                with arcpy.da.SearchCursor(lyr, ['SHAPE@'] + fields,
                                           expression, sr) as rows:
                    increment = job.get_increment(row_count)
                    mapped_fields = job.map_fields(dsc.name,
                                                   list(rows.fields[1:]),
                                                   field_types)
                    new_fields = job.new_fields
                    ordered_fields = OrderedDict()
                    for f in mapped_fields:
                        ordered_fields[f] = None
                    for i, row in enumerate(rows):
                        try:
                            if job.domains:
                                row = update_row(dsc.fields, rows, list(row))
                            if row[0]:
                                if generalize_value == 0 or generalize_value == 0.0:
                                    geo['wkt'] = row[0].WKT
                                else:
                                    if geometry_ops:
                                        geo['wkt'] = geometry_ops.generalize_geometry(
                                            row[0].WKT, generalize_value)
                                    else:
                                        geo['xmin'] = row[0].extent.XMin
                                        geo['xmax'] = row[0].extent.XMax
                                        geo['ymin'] = row[0].extent.YMin
                                        geo['ymax'] = row[0].extent.YMax
                            mapped_fields = dict(
                                zip(ordered_fields.keys(), row[1:]))
                            mapped_fields['_discoveryID'] = job.discovery_id
                            mapped_fields['meta_table_name'] = dsc.name
                            for nf in new_fields:
                                if nf['name'] == '*' or nf['name'] == dsc.name:
                                    for k, v in nf['new_fields'].iteritems():
                                        mapped_fields[k] = v
                            if global_id_field:
                                mapped_fields['meta_{0}'.format(
                                    global_id_field)] = mapped_fields.pop(
                                        'fi_{0}'.format(global_id_field))
                            mapped_fields['geometry_type'] = dsc.shapeType
                            mapped_fields['format_category'] = 'GIS'
                            mapped_fields['format_type'] = 'Feature'
                            mapped_fields[
                                'format'] = "application/vnd.esri.{0}.feature".format(
                                    dsc.dataType.lower())
                            entry['id'] = '{0}_{1}_{2}'.format(
                                job.location_id,
                                os.path.splitext(
                                    os.path.basename(data_path))[0], i)
                            entry['location'] = job.location_id
                            entry['action'] = job.action_type
                            entry['entry'] = {
                                'geo': geo,
                                'fields': mapped_fields
                            }
                            job.send_entry(entry)
                            if (i % increment) == 0:
                                status_writer.send_percent(
                                    i / row_count,
                                    "{0} {1:%}".format(dsc.name,
                                                       i / row_count),
                                    'esri_worker')
                        except (AttributeError, RuntimeError):
                            continue

        # Add and entry for the table and it's schema.
        schema['rows'] = row_count
        table_entry['id'] = '{0}_{1}'.format(job.location_id, dsc.name)
        table_entry['location'] = job.location_id
        table_entry['action'] = job.action_type
        table_entry['format_type'] = 'Schema'
        table_entry['entry'] = {
            'fields': {
                '_discoveryID': job.discovery_id,
                'name': dsc.name,
                'path': dsc.catalogPath,
                'format': 'schema'
            }
        }
        table_entry['entry']['fields']['schema'] = schema
        job.send_entry(table_entry)
Example #6
0
def index_service(connection_info):
    """Index the records in Map and Feature Services."""
    from utils import worker_utils
    geometry_ops = worker_utils.GeometryOps()
    job.connect_to_zmq()
    entry = {}
    items = {}
    url = ''

    if 'portal_url' in connection_info:
        connection_url = connection_info['portal_url']
    else:
        connection_url = connection_info['server_url']
    user_name = connection_info['user_name']
    password = connection_info['password']
    token = connection_info['token']
    generate_token = connection_info['generate_token']
    service_name = connection_info['service_name']
    service_type = connection_info['service_type']
    folder_name = connection_info['folder_name']
    if 'instance' in connection_info:
        instance = connection_info['instance']
    else:
        instance = 'arcgis'

    # Create the ArcGIS service helper and get the service url and the service items (layers/tables).
    ags_helper = worker_utils.ArcGISServiceHelper(connection_url,
                                                  user_name,
                                                  password,
                                                  instance=instance)
    try:
        if token == '' and generate_token == 'false':
            url, items = ags_helper.find_item_url(service_name, service_type,
                                                  folder_name)
        elif token and folder_name:
            url, items = ags_helper.find_item_url(service_name,
                                                  service_type,
                                                  folder_name,
                                                  token=token)
        elif token:
            url, items = ags_helper.find_item_url(service_name,
                                                  service_type,
                                                  token=token)
        elif generate_token and folder_name:
            url, items = ags_helper.find_item_url(service_name,
                                                  service_type,
                                                  folder_name,
                                                  token=token)
        elif generate_token == 'true':
            url, items = ags_helper.find_item_url(service_name,
                                                  service_type,
                                                  token=ags_helper.token)
    except IndexError:
        status_writer.send_state(status.STAT_FAILED,
                                 "Cannot locate {0}.".format(service_name))
        return
    except worker_utils.InvalidToken as invalid_token:
        status_writer.send_state(status.STAT_FAILED, invalid_token.message)
        return
    except Exception as ex:
        status_writer.send_state(status.STAT_FAILED, ex.message)
        return

    # Support wildcards for filtering layers and views in the service.
    layers = items['layers'] + items['tables']
    layers_to_keep = job.tables_to_keep()
    for layer in layers_to_keep:
        lk = layer.split('*')
        if len(lk) == 3 and layer.startswith('*') and layer.endswith('*'):
            layers = [l['id'] for l in layers if lk[1] in l['name']]
        elif layer.endswith('*'):
            layers = [l for l in layers if lk[0] in l['name']]
        elif layer.startswith('*'):
            layers = [l for l in layers if lk[1] in l['name']]
        else:
            layers = [l for l in layers if lk[0] == l['name']]

    # Index the records for each layer and table within a feature or map service.
    for layer in layers:
        i = 0.
        geo = {}
        layer_id = layer['id']
        layer_name = layer['name']
        mapped_attributes = OrderedDict()

        status_writer.send_status('Indexing {0}...'.format((url, layer_name)))

        # Get the list of fields and field types.
        fields_types = {}
        try:
            fields = ags_helper.get_item_fields(url, layer_id,
                                                ags_helper.token)
        except KeyError:
            status_writer.send_status(
                "Layer {0} has no fields.".format(layer_name))
            continue
        for f in fields:
            fields_types[f['name']] = f['type']

        # Check if the layer is empty and ensure to get all features, not just first 1000 (esri default).
        objectid_groups, row_count = ags_helper.get_item_row_count(
            url, layer_id, ags_helper.token)
        oid_field_name = ags_helper.oid_field_name
        if not row_count:
            status_writer.send_status(
                "Layer {0} has no features.".format(layer_name))
            continue
        else:
            increment = float(job.get_increment(row_count))

        for group in objectid_groups:
            group = [oid for oid in group if not oid == None]
            rows = ags_helper.get_item_rows(url,
                                            layer_id,
                                            ags_helper.token,
                                            where='{0} IN {1}'.format(
                                                oid_field_name, tuple(group)))
            features = None
            if 'features' in rows:
                features = rows['features']
            if not features:
                status_writer.send_status(
                    "Layer {0} has no features.".format(layer_name))
                continue
            if 'attributes' in features[0]:
                attributes = OrderedDict(features[0]['attributes'])
            else:
                status_writer.send_status(
                    "Layer {0} has no attributes.".format(layer_name))

            if 'geometryType' in rows:
                geometry_type = rows['geometryType']
            else:
                geometry_type = 'Table'
            if 'spatialReference' in rows:
                geo['srid'] = rows['spatialReference']['wkid']

            # Map the field and it's value.
            if not job.fields_to_keep == ['*']:
                for fk in job.fields_to_keep:
                    mapped_fields = dict((name, val)
                                         for name, val in attributes.items()
                                         if fk in name)
                    if job.fields_to_skip:
                        for fs in job.fields_to_skip:
                            [
                                mapped_fields.pop(name) for name in attributes
                                if name in fs
                            ]
            else:
                mapped_fields = copy.deepcopy(attributes)

            # This will generate the field mapping dictionary.
            job.tables_to_keep()

            date_fields = set()
            field_map = None
            for mapping in job.field_mapping:
                if mapping['name'] == layer_name:
                    field_map = mapping['map']
                    break

            if not field_map:
                for mapping in job.field_mapping:
                    if mapping['name'] == '*':
                        field_map = mapping['map']
                        break

            if field_map:
                for k, v in mapped_fields.items():
                    if k in field_map:
                        new_field = field_map[k]
                        mapped_attributes[new_field] = mapped_fields.pop(k)
                    else:
                        field_type = job.default_mapping(fields_types[k])
                        if field_type == 'fd_':
                            # Because dates are being returned as longs.
                            mapped_attributes[field_type + k] = v
                            date_fields.add(field_type + k)
                        else:
                            mapped_attributes[field_type +
                                              k] = mapped_fields.pop(k)
            else:
                for k, v in mapped_fields.items():
                    field_type = job.default_mapping(fields_types[k])
                    if field_type == 'fd_':
                        # Because dates are being returned as longs.
                        mapped_attributes[field_type + k] = v
                        date_fields.add(field_type + k)
                    else:
                        mapped_attributes[field_type +
                                          k] = mapped_fields.pop(k)

            i += len(features)
            if geometry_type == 'Table':
                for x, row in enumerate(features):
                    entry['id'] = '{0}_{1}_{2}_{3}'.format(
                        job.location_id, layer_name, i, x)
                    entry['location'] = job.location_id
                    entry['action'] = job.action_type
                    mapped_fields = dict(
                        zip(mapped_attributes.keys(),
                            row['attributes'].values()))
                    # Convert longs to datetime.
                    for df in date_fields:
                        mapped_fields[df] = get_date(mapped_fields[df])
                    mapped_fields['title'] = layer_name
                    mapped_fields['meta_table_name'] = layer_name
                    mapped_fields['_discoveryID'] = job.discovery_id
                    mapped_fields['format_category'] = 'GIS'
                    mapped_fields['format_type'] = 'Service Layer Record'
                    mapped_fields[
                        'format'] = 'application/vnd.esri.service.layer.record'
                    entry['entry'] = {'fields': mapped_fields}
                    job.send_entry(entry)
                    if (i % increment) == 0:
                        status_writer.send_percent(
                            i / row_count,
                            "{0} {1:%}".format(layer_name,
                                               i / row_count), 'esri_worker')
            else:
                # Faster to do one if check for geometry type then to condense code and check in every iteration.
                if geometry_type == 'esriGeometryPoint':
                    for x, feature in enumerate(features):
                        pt = feature['geometry']
                        geo['lon'] = pt['x']
                        geo['lat'] = pt['y']
                        entry['id'] = '{0}_{1}_{2}_{3}'.format(
                            job.location_id, layer_name, int(i), x)
                        entry['location'] = job.location_id
                        entry['action'] = job.action_type
                        mapped_fields = dict(
                            zip(mapped_attributes.keys(),
                                feature['attributes'].values()))
                        # Convert longs to datetime.
                        for df in date_fields:
                            mapped_fields[df] = get_date(mapped_fields[df])
                        mapped_fields['_discoveryID'] = job.discovery_id
                        mapped_fields['title'] = layer_name
                        mapped_fields['geometry_type'] = 'Point'
                        mapped_fields['meta_table_name'] = layer_name
                        mapped_fields['format_category'] = 'GIS'
                        mapped_fields['format_type'] = 'Service Layer Feature'
                        mapped_fields[
                            'format'] = 'application/vnd.esri.service.layer.record'
                        entry['entry'] = {'geo': geo, 'fields': mapped_fields}
                        job.send_entry(entry)
                        if (i % increment) == 0:
                            status_writer.send_percent(
                                i / row_count,
                                "{0} {1:%}".format(layer_name,
                                                   int(i) / row_count),
                                'esri_worker')
                else:
                    generalize_value = job.generalize_value
                    for x, feature in enumerate(features):
                        try:
                            geometry = make_feature(
                                feature)  # Catch possible null geometries.
                        except RuntimeError:
                            continue

                        if generalize_value > 0.9:
                            geo['xmin'], geo[
                                'xmax'] = geometry.extent.XMin, geometry.extent.XMax
                            geo['ymin'], geo[
                                'ymax'] = geometry.extent.YMin, geometry.extent.YMax
                        elif generalize_value == 0 or generalize_value == 0.0:
                            geo['wkt'] = geometry.WKT
                        else:
                            if geometry_ops:
                                geo['wkt'] = geometry_ops.generalize_geometry(
                                    geometry.WKT, generalize_value)
                            else:
                                geo['xmin'], geo[
                                    'xmax'] = geometry.extent.XMin, geometry.extent.XMax
                                geo['ymin'], geo[
                                    'ymax'] = geometry.extent.YMin, geometry.extent.YMax

                        entry['id'] = '{0}_{1}_{2}_{3}'.format(
                            job.location_id, layer_name, int(i), x)
                        entry['location'] = job.location_id
                        entry['action'] = job.action_type
                        mapped_fields = dict(
                            zip(mapped_attributes.keys(),
                                OrderedDict(feature['attributes']).values()))
                        try:
                            # Convert longs to datetime.
                            for df in date_fields:
                                mapped_fields[df] = get_date(mapped_fields[df])
                            mapped_fields['title'] = layer_name
                            mapped_fields['geometry_type'] = geometry.type
                            mapped_fields['meta_table_name'] = layer_name
                            mapped_fields['meta_table_path'] = layer['path']
                            mapped_fields[
                                'meta_table_location'] = os.path.dirname(
                                    layer['path'])
                            mapped_fields['format_category'] = 'GIS'
                            mapped_fields[
                                'format_type'] = 'Service Layer Feature'
                            mapped_fields[
                                'format'] = 'application/vnd.esri.service.layer.record'
                            mapped_fields['_discoveryID'] = job.discovery_id
                            entry['entry'] = {
                                'geo': geo,
                                'fields': mapped_fields
                            }
                        except KeyError:
                            job.send_entry(entry)
                        if (i % increment) == 0:
                            status_writer.send_percent(
                                i / row_count,
                                "{0} {1:%}".format(layer_name, i / row_count),
                                'esri_worker')
Example #7
0
def index_service(connection_info):
    """Index the records in Map and Feature Services."""
    from utils import worker_utils
    geometry_ops = worker_utils.GeometryOps()
    geo_json_converter = worker_utils.GeoJSONConverter()
    job.connect_to_zmq()
    entry = {}
    items = {}
    url = ''

    if 'portal_url' in connection_info:
        connection_url = connection_info['portal_url']
    else:
        connection_url = connection_info['server_url']
    user_name = connection_info['user_name']
    password = connection_info['password']
    token = connection_info['token']
    generate_token = connection_info['generate_token']
    service_name = connection_info['service_name']
    service_type = connection_info['service_type']
    folder_name = connection_info['folder_name']

    if 'instance' in connection_info:
        instance = connection_info['instance']
    else:
        instance = 'arcgis'
    if 'verify_ssl' in connection_info and connection_info['verify_ssl'].lower(
    ) in ['false', 'n']:
        verify_ssl = False
    else:
        verify_ssl = True

    # Create the ArcGIS service helper and get the service url and the service items (layers/tables).
    ags_helper = worker_utils.ArcGISServiceHelper(connection_url,
                                                  user_name,
                                                  password,
                                                  verify_ssl,
                                                  instance=instance)
    try:
        if token == '' and generate_token == 'false':
            url, items = ags_helper.find_item_url(service_name, service_type,
                                                  folder_name)
        elif token and folder_name:
            url, items = ags_helper.find_item_url(service_name,
                                                  service_type,
                                                  folder_name,
                                                  token=token)
        elif token:
            url, items = ags_helper.find_item_url(service_name,
                                                  service_type,
                                                  token=token)
        elif generate_token and folder_name:
            url, items = ags_helper.find_item_url(service_name,
                                                  service_type,
                                                  folder_name,
                                                  token=token)
        elif generate_token == 'true':
            url, items = ags_helper.find_item_url(service_name,
                                                  service_type,
                                                  token=ags_helper.token)
    except IndexError:
        status_writer.send_state(status.STAT_FAILED,
                                 "Cannot locate {0}.".format(service_name))
        return
    except worker_utils.InvalidToken as invalid_token:
        status_writer.send_state(status.STAT_FAILED, invalid_token.message)
        return
    except Exception as ex:
        status_writer.send_state(status.STAT_FAILED, ex.message)
        return

    # Support wildcards for filtering layers and views in the service.
    layers = items['layers'] + items['tables']
    layers_to_keep = job.tables_to_keep()
    layers_process = []
    for layer in layers_to_keep:
        lk = layer.split('*')
        if len(lk) == 3 and layer.startswith('*') and layer.endswith('*'):
            layers_process += [l['id'] for l in layers if lk[1] in l['name']]
        elif layer.endswith('*'):
            layers_process += [l for l in layers if lk[0] in l['name']]
        elif layer.startswith('*'):
            layers_process += [l for l in layers if lk[1] in l['name']]
        else:
            layers_process += [l for l in layers if lk[0] == l['name']]

    # Index the records for each layer and table within a feature or map service.
    skip_tables = job.tables_to_skip()
    for layer in layers_process:
        if layer['name'] in skip_tables:
            continue
        i = 0.
        geo = {}
        layer_id = layer['id']
        layer_name = layer['name']
        mapped_attributes = OrderedDict()

        status_writer.send_status('Indexing {0}...'.format((url, layer_name)))

        # Get the list of fields and field types.
        fields_types = {}
        try:
            fields = ags_helper.get_item_fields(url, layer_id,
                                                ags_helper.token)
        except KeyError:
            status_writer.send_status(
                "Layer {0} has no fields.".format(layer_name))
            continue
        for f in fields:
            fields_types[f['name']] = f['type']

        # Check if the layer is empty and ensure to get all features, not just first 1000 (esri default).
        query = job.get_table_query(layer_name)
        if query:
            where_clause = query
        elif 'query' in connection_info:
            where_clause = connection_info['query']
        else:
            where_clause = "1=1"

        try:
            groups, row_count = ags_helper.get_item_row_count(
                url, layer_id, ags_helper.token, where_clause)
        except Exception:
            where_clause = "1=1"
            groups, row_count = ags_helper.get_item_row_count(
                url, layer_id, ags_helper.token, where_clause)

        oid_field_name = ags_helper.oid_field_name
        if not row_count:
            status_writer.send_status(
                "Layer {0} has no features.".format(layer_name))
            continue
        else:
            increment = float(job.get_increment(row_count))

        for group in groups:
            rows = ags_helper.get_item_rows(url,
                                            layer_id,
                                            ags_helper.token,
                                            where=where_clause,
                                            resultOffset=group,
                                            resultRecordCount=1000,
                                            response_format='geojson')
            features = None
            if 'features' in rows:
                features = rows['features']
            if not features:
                status_writer.send_status(
                    "Layer {0} has no features.".format(layer_name))
                continue
            if 'properties' in features[0]:
                attributes = OrderedDict(features[0]['properties'])
            elif 'attributes' in features[0]:
                attributes = OrderedDict(features[0]['attributes'])
            else:
                status_writer.send_status(
                    "Layer {0} has no attributes.".format(layer_name))

            if 'geometry' in features[0] and features[0]['geometry']:
                if 'type' in features[0]['geometry']:
                    geometry_type = features[0]['geometry']['type']
                elif 'rings' in features[0]['geometry']:
                    geometry_type = 'Rings'
                elif 'paths' in features[0]['geometry']:
                    geometry_type = 'Paths'
                elif 'x' in features[0]['geometry'] and 'y' in features[0][
                        'geometry']:
                    geometry_type = 'Point'
            else:
                geometry_type = 'Table'
            if 'crs' in rows:
                if ':' in rows['crs']['properties']['name']:
                    geo['srid'] = rows['crs']['properties']['name'].split(
                        ':')[1]
                else:
                    geo['srid'] = rows['crs']['properties']['name']
            elif 'spatialReference' in rows:
                geo['srid'] = rows['spatialReference']['wkid']
            # Map the field and it's value.
            if not job.fields_to_keep == ['*']:
                for fk in job.fields_to_keep:
                    mapped_fields = dict((name, val)
                                         for name, val in attributes.items()
                                         if fk in name)
                    if job.fields_to_skip:
                        for fs in job.fields_to_skip:
                            [
                                mapped_fields.pop(name) for name in attributes
                                if name in fs
                            ]
            else:
                mapped_fields = copy.deepcopy(attributes)

            # This will generate the field mapping dictionary.
            job.tables_to_keep()

            date_fields = set()
            field_map = None
            for mapping in job.field_mapping:
                if mapping['name'] == layer_name:
                    field_map = mapping['map']
                    break

            if not field_map:
                for mapping in job.field_mapping:
                    if mapping['name'] == '*':
                        field_map = mapping['map']
                        break

            if field_map:
                for k, v in mapped_fields.items():
                    if k in field_map:
                        new_field = field_map[k]
                        mapped_attributes[new_field] = mapped_fields.pop(k)
                    else:
                        field_type = job.default_mapping(fields_types[k])
                        if field_type == 'fd_':
                            # Because dates are being returned as longs.
                            mapped_attributes[field_type + k] = v
                            date_fields.add(field_type + k)
                        else:
                            mapped_attributes[field_type +
                                              k] = mapped_fields.pop(k)
            else:
                for k, v in mapped_fields.items():
                    field_type = job.default_mapping(fields_types[k])
                    if field_type == 'fd_':
                        # Because dates are being returned as longs.
                        mapped_attributes[field_type + k] = v
                        date_fields.add(field_type + k)
                    else:
                        mapped_attributes[field_type +
                                          k] = mapped_fields.pop(k)

            i += len(features)
            if geometry_type == 'Table':
                for x, row in enumerate(features):
                    hash_obj = hashlib.md5(
                        os.path.join(
                            url,
                            '{0}_{1}_{2}_{3}'.format(job.location_id,
                                                     layer_name, i, x)))
                    entry['id'] = hash_obj.hexdigest()
                    entry['location'] = job.location_id
                    entry['action'] = job.action_type
                    if 'properties' in row:
                        mapped_fields = dict(
                            zip(mapped_attributes.keys(),
                                row['properties'].values()))
                    else:
                        mapped_fields = dict(
                            zip(mapped_attributes.keys(),
                                row['attributes'].values()))
                    # Convert longs to datetime.
                    for df in date_fields:
                        mapped_fields[df] = get_date(mapped_fields[df])
                    mapped_fields['id'] = entry['id']
                    mapped_fields['meta_table_name'] = layer_name
                    mapped_fields['_discoveryID'] = job.discovery_id
                    mapped_fields['format_category'] = 'GIS'
                    mapped_fields['format_type'] = 'Service Layer Record'
                    mapped_fields[
                        'format'] = 'application/vnd.esri.service.layer.record'
                    entry['entry'] = {'fields': mapped_fields}
                    job.send_entry(entry)
                    if (x % increment) == 0:
                        status_writer.send_percent(
                            i / row_count,
                            "{0} {1:%}".format(layer_name,
                                               i / row_count), 'agol_worker')
            else:
                # Faster to do one if check for geometry type then to condense code and check in every iteration.
                if geometry_type == 'Point':
                    for x, feature in enumerate(features, 1):
                        if 'type' not in feature['geometry']:
                            pt = feature['geometry']
                            geo['lon'] = pt['x']
                            geo['lat'] = pt['y']
                        else:
                            geo['wkt'] = geo_json_converter.convert_to_wkt(
                                feature['geometry'], 14)
                        hash_obj = hashlib.md5(
                            os.path.join(
                                url, '{0}_{1}_{2}_{3}'.format(
                                    job.location_id, layer_name, int(i), x)))
                        entry['id'] = hash_obj.hexdigest()
                        entry['location'] = job.location_id
                        entry['action'] = job.action_type
                        if 'properties' in feature:
                            mapped_fields = dict(
                                zip(mapped_attributes.keys(),
                                    feature['properties'].values()))
                        else:
                            mapped_fields = dict(
                                zip(mapped_attributes.keys(),
                                    feature['attributes'].values()))
                        # Convert longs to datetime.
                        for df in date_fields:
                            mapped_fields[df] = get_date(mapped_fields[df])
                        mapped_fields['id'] = entry['id']
                        mapped_fields['_discoveryID'] = job.discovery_id
                        mapped_fields['geometry_type'] = 'Point'
                        mapped_fields['meta_table_name'] = layer_name
                        mapped_fields['format_category'] = 'GIS'
                        mapped_fields['format_type'] = 'Service Layer Feature'
                        mapped_fields[
                            'format'] = 'application/vnd.esri.service.layer.record'
                        entry['entry'] = {'geo': geo, 'fields': mapped_fields}
                        job.send_entry(entry)
                        if (x % increment) == 0:
                            status_writer.send_percent(
                                i / row_count,
                                "{0} {1:%}".format(layer_name, i / row_count),
                                'agol_worker')
                else:
                    generalize_value = job.generalize_value
                    for x, feature in enumerate(features, 1):
                        try:
                            # geometry = make_feature(feature)  # Catch possible null geometries.
                            if geometry_type == 'Rings':
                                geometry = geo_json_converter.create_polygon(
                                    feature['geometry']['rings'][0])
                            elif geometry_type == 'Paths':
                                geometry = geo_json_converter.create_polyline(
                                    feature['geometry']['paths'][0])
                            elif geometry_type == 'LineString':
                                geometry = geo_json_converter.create_polyline(
                                    feature['geometry']['coordinates'])
                            else:
                                geometry = geo_json_converter.convert_to_wkt(
                                    feature['geometry'], 14)
                        except RuntimeError:
                            continue

                        # if generalize_value > 0.9:
                        #     geo['xmin'], geo['xmax'] = geometry.extent.XMin, geometry.extent.XMax
                        #     geo['ymin'], geo['ymax'] = geometry.extent.YMin, geometry.extent.YMax
                        if geometry:
                            if generalize_value == 0 or generalize_value == 0.0:
                                if isinstance(geometry, str):
                                    geo['wkt'] = geometry
                                else:
                                    geo['wkt'] = geometry.WKT
                            else:
                                if geometry_ops:
                                    geo['wkt'] = geometry_ops.generalize_geometry(
                                        geometry, generalize_value)
                                else:
                                    geo['wkt'] = geometry.WKT
                                # geo['xmin'], geo['xmax'] = geometry.extent.XMin, geometry.extent.XMax
                                # geo['ymin'], geo['ymax'] = geometry.extent.YMin, geometry.extent.YMax

                        hash_obj = hashlib.md5(
                            os.path.join(
                                url, '{0}_{1}_{2}_{3}'.format(
                                    job.location_id, layer_name, int(i), x)))
                        entry['id'] = hash_obj.hexdigest()
                        entry['location'] = job.location_id
                        entry['action'] = job.action_type
                        if 'properties' in feature:
                            mapped_fields = dict(
                                zip(
                                    mapped_attributes.keys(),
                                    OrderedDict(
                                        feature['properties']).values()))
                        else:
                            mapped_fields = dict(
                                zip(
                                    mapped_attributes.keys(),
                                    OrderedDict(
                                        feature['attributes']).values()))
                        try:
                            # Convert longs to datetime.
                            for df in date_fields:
                                mapped_fields[df] = get_date(mapped_fields[df])
                            if geometry_type in ('Paths', 'LineString'):
                                mapped_fields['geometry_type'] = 'Polyline'
                            elif geometry_type == 'Rings':
                                mapped_fields['geometry_type'] = 'Polygon'
                            else:
                                mapped_fields['geometry_type'] = geometry_type
                            mapped_fields['meta_table_name'] = layer_name
                            try:
                                mapped_fields['meta_table_path'] = layer[
                                    'path']
                            except KeyError:
                                layer['path'] = url + '/' + str(layer_id)
                                mapped_fields['meta_table_path'] = layer[
                                    'path']
                            mapped_fields['id'] = entry['id']
                            mapped_fields[
                                'meta_table_location'] = os.path.dirname(
                                    layer['path'])
                            mapped_fields['format_category'] = 'GIS'
                            mapped_fields[
                                'format_type'] = 'Service Layer Feature'
                            mapped_fields[
                                'format'] = 'application/vnd.esri.service.layer.record'
                            mapped_fields['_discoveryID'] = job.discovery_id
                            entry['entry'] = {
                                'geo': geo,
                                'fields': mapped_fields
                            }
                            job.send_entry(entry)
                        except KeyError:
                            job.send_entry(entry)
                        if (i % increment) == 0:
                            status_writer.send_percent(
                                i / row_count,
                                "{0} {1:%}".format(layer_name, i / row_count),
                                'agol_worker')