Beispiel #1
0
def to_json_schema(obj, mode: UpdateMode, schema: Optional[Schema] = None):
    """Create JSON schema based on an object.

    Arguments:
        obj {dict} -- Dictionary object

    Keyword Arguments:
        schema {dict} -- Existing schema if exists. (default: {None})

    Returns:
        [dict] -- New or updated schema.
    """
    if schema is not None:
        schema = convert_from_openapi(schema)
    if mode == UpdateMode.GEN:
        builder = SchemaBuilder()
        if schema is not None:
            builder.add_schema(schema)
        builder.add_object(obj)
        out = builder.to_schema()
        return out
    elif schema is None:
        return {"oneOf": [to_const(obj)]}
    else:
        return {"oneOf": [to_const(obj), schema]}
Beispiel #2
0
def infer_schema(samples: List[Dict[str, Any]]) -> Schema:
    builder = SchemaBuilder("http://json-schema.org/draft-07/schema#")
    for sample in samples:
        builder.add_object(sample)
    builder.add_schema(extension)

    return builder.to_schema()
Beispiel #3
0
    def guessSchema(self, dba, kind, existingSchema=None):
        '''
        Guess a JSONSchema for a model kind from examples.

        :param DatabaseAccess dba: the fasion database to search.
        :param string kind: the model kind to guess.
        :param JSONobject existingSchema: starting schema, if any.
        :returns: True if the schema was guessed and created.
        :rtype: boolean
        '''
        objs = dba.table(kind).all()
        builder = SchemaBuilder()
        if existingSchema is not None:
            builder.add_schema(existingSchema)
        elif len(objs) == 0:
            logging.error(
                "Can't guess with no schema and no examples of kind {0}".
                format(kind))
            return False
        for o in objs:
            builder.add_object(o)
        schema = builder.to_schema()
        localSeg = self.loadSegment("local", dba)
        localSeg.createSchema(kind, schema)
        return True
Beispiel #4
0
    def __create_shema(attnames, atttypes, catname):
        b1 = SchemaBuilder()
        shema_name = int(
            hashlib.sha1(catname.encode('utf-8')).hexdigest(), 16) % (10**8)

        for i, name in enumerate(attnames):
            b1.add_schema({
                "type": "object",
                "properties": {
                    "{0}".format(name): {
                        "type": "{0}".format(atttypes[i])
                    }
                }
            })
        b1.add_schema({"required": attnames})

        shema = str(b1.to_json())
        # with codecs.open('attSchemas/{0}.json'.format(shema_name),'w', 'utf-8') as fp:
        #     fp.write(json.dumps(shema, ensure_ascii=False))

        with open('attSchemas/{0}.json'.format(shema_name),
                  'w',
                  encoding='utf8') as f:
            f.write(shema)
        # b1.to_json('attSchemas/{0}.json'.format(shema_name))
        return '{0}.json'.format(shema_name)
Beispiel #5
0
    def generate_json_schema(self, data):
        '''
        Uses the genson package to introspect json type data and generate the skeleton of a JSON Schema document
        (Draft 6) for further documentation.

        :param data: must be one of the following - python dictionary object, python list of dictionaries, json string
        that can be loaded to a dictionary or list of dictionaries
        :return: json string containing the generated json schema skeleton
        '''
        if isinstance(data, str):
            data = json.loads(data)

        if isinstance(data, dict):
            data = [data]

        if len(data) == 0:
            return "Error: your list of objects (dictionaries) must contain at least one object to process"

        if not isinstance(data[0], dict):
            return "Error: your list must contain a dictionary type object"

        try:
            builder = SchemaBuilder()
            builder.add_schema({"type": "object", "properties": {}})
            for r in data:
                for k, v in r.items():
                    builder.add_object({k: v})
        except Exception as e:
            return f"Error: {e}"

        return builder.to_json()
Beispiel #6
0
def generate_schema(items):
    """Creates json schema based on items."""
    builder = SchemaBuilder()
    builder.add_schema({"type": "object", "properties": {}})
    for item in items:
        builder.add_object(item)
    return builder.to_schema()
Beispiel #7
0
def infer_schema(samples):
    builder = SchemaBuilder("http://json-schema.org/draft-07/schema#")
    for s in samples:
        builder.add_object(s)
    builder.add_schema(extension)

    return builder.to_schema()
Beispiel #8
0
class JsonSchemaBuilder:
    '''
        JsonSchema builder class.

        Keyword Arguments:
            schema_uri: URI to be set for '$schema' key.
    '''

    def __init__(self, *, schema_uri=None):
        if schema_uri:
            self.__builder = SchemaBuilder(schema_uri=schema_uri)
        else:
            self.__builder = SchemaBuilder()

    def schema(self, schema):
        '''
            Add schema to the overall schema.

            Call multiple times to add more schema.

            Arguments:
                schema: Json schema dict
        '''
        self.__builder.add_schema(schema)
        return self

    def string(self, jstr):
        '''
            Add schema based on an object string to the overall schema.

            Call multiple times to add more objects.

            Arguments:
                jstr: A string representing a Python object.
        '''
        jobj = Json.from_str(jstr.strip(), allow_any=True)
        self.object(jobj)
        return self

    def object(self, jobj):
        '''
            Add schema based on an object to the overall schema.

            Call multiple times to add more objects.

            Arguments:
                jobj_or_str: JsonDict/JsonList or a Python str/list/dict.
        '''
        if type(jobj) in {JsonDict, JsonList}:
            self.__builder.add_object(jobj.raw_object)
        else:
            self.__builder.add_object(jobj)
        return self

    def build(self):
        '''
            Create `JsonSchema` based on the building constructs provided till this point.
        '''
        return JsonSchema(self.__builder.to_schema())
 def generate_schema(self) -> dict:
     """
     :return: a dict json-schema based on the current data
     """
     builder = SchemaBuilder()
     builder.add_schema({"type": "object", "properties": {}})
     builder.add_object(self.to_dict())
     return builder.to_schema()
Beispiel #10
0
def get_json_schemas(json_data):
    """Return the standard json schema for given json"""

    builder = SchemaBuilder()
    builder.add_schema({"type": "object", "properties": {}})
    builder.add_object(json_data)
    api_schema = builder.to_schema()
    return api_schema
Beispiel #11
0
    def build_schemas(self):
        """Do a pass over the files and use GenSon to generate their schemas"""

        # TODO add sampling so that we don't have to pass over every single record

        LOGGER.info('Building schemas')

        if not self.state.get('schemas'):
            self.state['schemas'] = {}

        for dirpath, d in self.directories.items():
            dirname = d['dirname']
            LOGGER.info('Building schema for `{}`'.format(dirname))
            schema_builder = SchemaBuilder()

            if not self.state['schemas'].get(dirname):
                self.state['schemas'][dirname] = {
                    "type": "object",
                    "properties": {}
                }
            else:
                LOGGER.info(
                    "Existing schema for `{}` will be used as seed schema".
                    format(dirname))

            schema_builder.add_schema(self.state['schemas'][dirname])

            for f in d['files']:
                if self.file_format == 'jsonl':
                    for line in open(f['absolute_path'], 'r'):
                        parsed_line = json.loads(line)
                        parsed_line = self._add_key_to_rec(parsed_line, line)
                        schema_builder.add_object(parsed_line)
                elif self.file_format == 'csv':
                    # Note: parsing dates is pointless until date formatting support in GenSon
                    for df in pd.read_csv(f['absolute_path'],
                                          parse_dates=False,
                                          chunksize=1):
                        rec = df.to_dict('records')[0]
                        rec = self._add_key_to_rec(rec)
                        schema_builder.add_object(rec)
                elif self.file_format == 'log':
                    # TODO Use pattern per table and get it not from config
                    grok = Grok(CONFIG['grok_pattern'])
                    for line in open(f['absolute_path'], 'r'):
                        parsed_line = grok.match(line)
                        if not parsed_line:
                            parsed_line = {}
                        parsed_line['_sdc_raw_log_line'] = line
                        schema_builder.add_object(parsed_line)

            self.directories[dirpath]['schema'] = schema_builder.to_schema()
            self.state['schemas'][dirname] = self.directories[dirpath][
                'schema']

        LOGGER.info('Done building schemas')
Beispiel #12
0
 def save_common_schemas():
     for name, model in COMMON_MODELS.items():
         schema_builder = SchemaBuilder()
         schema_builder.add_schema(model)
         schema = schema_builder.to_schema()
         if name.endswith("resource_list.json"):
             schema["properties"]["next"]["type"] = ["null", "string"]
             schema["properties"]["previous"]["type"] = ["null", "string"]
         with Path(name).relative_to(Path(name).root).open("w") as f:
             f.write(json.dumps(schema, indent=4, sort_keys=True))
Beispiel #13
0
def infer_schema(data_array, ):
    builder = SchemaBuilder()
    for data in data_array:
        builder.add_schema({
            "type": "object",
            "properties": {},
        })
        builder.add_object(data)
    schema = builder.to_schema()
    return schema
def main():
    print(str(sys.argv))

    mesh = sys.argv[2]
    dataLocation = os.path.join(sys.argv[1], sys.argv[3])
    material = os.path.join(sys.argv[1], "materials/" + sys.argv[4])

    builder = SchemaBuilder()
    builder.add_schema({
        "frame_time":
        float(sys.argv[5]),
        "frame_steps":
        int(sys.argv[6]),
        "end_time":
        int(sys.argv[7]),
        "cloths": [{
            "mesh": mesh,
            "materials": [{
                "data": material,
                "thicken": 2
            }],
            "remeshing": {
                "refine_angle": 0.3,
                "refine_compression": 0.005,
                "refine_velocity": 0.5,
                "size": [10e-3, 200e-3],
                "aspect_min": 0.2
            }
        }],
        "handles": [{
            "end_time": 7
        }],
        "motions": [[{
            "time": 0,
            "transform": {
                "scale": 1.0
            },
        }]],
        "obstacles": [{
            "mesh": dataLocation + "/body.obj",
            "motion": 0
        }],
        "gravity": [0, 0, -9.8],
        "disable": ["popfilter", "remeshing"],
        "magic": {
            "repulsion_thickness": 5e-3,
            "collision_stiffness": 1e6
        }
    })

    builder.to_schema()
    file = open(os.path.join(str(dataLocation), 'conf.json'), 'w')
    file.write(builder.to_json(indent=1))
    file.close()
Beispiel #15
0
 def expect_request(self, schema, merge=False):
     schema = self._input_object(schema)
     if "properties" not in schema:
         schema = { "properties": schema }
     if self._input_boolean(merge):
         new_schema = SchemaBuilder(schema_uri=False)
         new_schema.add_schema(self.schema['properties']['request'])
         new_schema.add_schema(schema)
         self.schema['properties']['request'] = new_schema.to_schema()
     else:
         self.schema['properties']['request'] = schema
     return self.schema['properties']['request']
Beispiel #16
0
    def generate_json_schema(self,
                             data,
                             return_type="json",
                             build_definitions=False):
        '''
        Uses the genson package to introspect json type data and generate the skeleton of a JSON Schema document
        (Draft 6) for further documentation.

        :param data: must be one of the following - python dictionary object, python list of dictionaries, json string
        that can be loaded to a dictionary or list of dictionaries
        :param return_type: JSON string or defaults to dictionary
        :param build_definitions: Run a process to prompt for title and description on schema and properties
        :return: json string containing the generated json schema skeleton
        '''
        if isinstance(data, str):
            data = json.loads(data)

        if isinstance(data, dict):
            data = [data]

        if len(data) == 0:
            return "Error: your list of objects (dictionaries) must contain at least one object to process"

        if not isinstance(data[0], dict):
            return "Error: your list must contain a dictionary type object"

        try:
            builder = SchemaBuilder()
            builder.add_schema({"type": "object", "properties": {}})
            for r in data:
                for k, v in r.items():
                    builder.add_object({k: v})
        except Exception as e:
            return f"Error: {e}"

        schema = json.loads(builder.to_json())

        if build_definitions:
            schema["title"] = input("schema title: ")
            schema["description"] = input("schema description: ")
            new_props = dict()
            for item in schema["properties"].items():
                new_props[item[0]] = dict()
                new_props[item[0]]["type"] = item[1]["type"]
                new_props[item[0]]["title"] = input(f"title for {item[0]}: ")
                new_props[item[0]]["description"] = input(
                    f"description for {item[0]}: ")
            schema["properties"] = new_props

        if return_type == "json":
            schema = json.dumps(schema)

        return schema
Beispiel #17
0
class Tap:
    @check_type
    def __init__(self, tap, tap_schema, tap_name: str, tap_key: str):

        if not tap_schema is None and not isinstance(tap, dict):
            raise ValueError('tap_schema must be None or a dict')

        self.tap = tap
        self.tap_schema = tap_schema
        self.tap_name = tap_name
        self.tap_key = tap_key

        if not self.tap_schema:
            self.builder = SchemaBuilder()
            self.builder.add_schema({'type': 'object', 'properties': {}})

    def __iter__(self):
        return self

    def __next__(self):
        row = self.tap.emit()
        if row:
            if not isinstance(row, dict):
                raise ValueError('tap.emit() must returned a dict')
            if self.tap_key not in row:
                raise ValueError('tap key not exist in elements from tap')

            if not self.tap_schema:
                self.builder.add_object(row)
                schema = self.builder.to_schema()
            else:
                schema = self.tap_schema

            if isinstance(self.tap_key, (str, bytes)):
                key_properties = [self.tap_key]
            if not isinstance(key_properties, list):
                raise Exception('tap key must be a string or list of strings')

            r = SchemaMessage(
                stream = self.tap_name,
                schema = schema,
                key_properties = key_properties,
                bookmark_properties = None,
            )
            s = format_message(r)
            r = RecordMessage(
                stream = self.tap_name, record = row, time_extracted = None
            )
            r = format_message(r)
            row = (s.encode(), r.encode())
        return row
Beispiel #18
0
def merge_json_schemas(
        schemas: List[Schema]) -> Union[JsonSchema, InvalidSchema]:
    try:
        builder = SchemaBuilder()
        for schema in schemas:
            if isinstance(schema, JsonSchema):
                builder.add_schema(schema.schema)
            else:
                return InvalidSchema(
                    "merge_json_schemas Only supports JsonSchema type")
        merged = builder.to_schema()
        return JsonSchema(merged, schemas[0].path)  # path does not matter here
    except Exception as e:
        return InvalidSchema(f"Invalid Schema, builder error: {message(e)}")
Beispiel #19
0
class PSchemaBuilder:
    def __init__(self, schema=None):
        self.builder = SchemaBuilder()
        if schema is not None:
            self.builder.add_schema(schema)

    def __reduce_ex__(self, protocol):
        return self.__class__, (self.to_schema(), )

    def add_schema(self, schema):
        self.builder.add_schema(schema)

    def add_object(self, data):
        self.builder.add_object(data)

    def to_schema(self):
        return self.builder.to_schema()
Beispiel #20
0
def autogenerate_schemas(method, arg=None, params=None):
    mw = MicroWriter(write_key=TEST_WRITE_KEY, base_url=BASE_URL)
    obj = dev_get(method=method, arg=arg, params=params)
    if obj is not None:
        with open('./autogen_examples/' + method + '_example.json',
                  'wt') as fp:
            json.dump(fp=fp, obj=obj)
        builder = SchemaBuilder()
        seed_schema = {'type': 'array', 'items': []}
        builder.add_schema(seed_schema)
        for ob in obj:
            builder.add_object(obj=ob)
        schema = builder.to_schema()
        with open('./autogen_schemas/' + method + '_schema.json', 'wt') as fp:
            json.dump(fp=fp, obj=schema)
    else:
        print('Warning: ' + method + ' failed.')
Beispiel #21
0
    def draft_schema_from(self, path, save_path=None):
        """
        Create a draft jsonschema from a json file of data.

        :param path: path to the json file.
        :param save_path: path where to save the generated schema.

        :return: the generated schema.
        """
        file = self.load_json(path)

        builder = SchemaBuilder()
        builder.add_schema({"type": "object", "properties": {}})
        builder.add_object(file)

        draft_schema = builder.to_json()
        if save_path is not None:
            with open(save_path, "w") as outfile:
                outfile.write(draft_schema)
        return draft_schema
Beispiel #22
0
def json2schema(seed, payload):
    # can return None
    logging.debug("seed schema %s resp payload %s", seed, payload)
    builder = SchemaBuilder(schema_uri=None)
    if seed:
        builder.add_schema(seed)

    if payload.endswith('.....'):
        logging.warn('skipping truncated payload')
        return None

    payload_dict = None
    try:
        payload_dict = json.loads(payload)
    except ValueError as e:
        logging.warn('skipping unrecognized payload')
        return None

    builder.add_object(payload_dict)
    schema = builder.to_schema()
    logging.debug('schema updated %s', schema)
    return schema
Beispiel #23
0
def add_event_to_schemas(event_str: str, json_schemas: dict):
    """Tries to add the event schema to the json_schemas dict."""

    try:
        event = json.loads(event_str)
    except (json.JSONDecodeError, TypeError):
        logger.error("Input event is not a valid JSON string")
        return
    if "event_source" not in event or "event_type" not in event:
        logger.error("Input event is missing `event_source` or `event_type`")
        return
    event = replace_pattern_properties_and_jsons(event)

    title = get_title(event)
    builder = SchemaBuilder()
    # Retrieve the schema by title, if we have already defined it before or use a new one.
    new_schema = {"title": title, "type": "object", "properties": {}}
    builder.add_schema(json_schemas.get(title, new_schema))
    # Update the schema with the current event.
    builder.add_object(event)
    # Store the updated schema in json_schemas.
    json_schemas[title] = builder.to_schema()
Beispiel #24
0
def generate():
    host = os.environ['UNIFI_HOST']
    port = int(os.environ['UNIFI_PORT'])
    username = os.environ['UNIFI_USER']
    password = os.environ['UNIFI_PASS']
    site = os.environ['UNIFI_SITE']

    jinja = Environment(loader=PackageLoader('generate', 'templates'))
    template = jinja.get_template('client.py')

    with open('uniman' + os.path.sep + 'client.py', 'w') as client_py:
        client_py.write(template.render(endpoints=Endpoints))

    payloads = {
        Endpoints.LOGIN.name: {'username': username, 'password': password}
    }

    session = Session()

    for endpoint in Endpoints:
        request = endpoint.value.to_request(host, port, site, payload=payloads.get(endpoint.name, {}))
        response = session.send(session.prepare_request(request), verify=False)

        builder = SchemaBuilder()
        builder.add_object(response.json())
        builder.add_schema({'title': endpoint.name.title().replace('_', '')})
        builder.add_schema({'required': []})
        builder.add_schema({'properties': {'data': {'items': {'required': []}}}})
        schema = json.loads(builder.to_json())

        schema_path = 'uniman' + os.path.sep + 'schema' + os.path.sep + endpoint.name.lower() + '.schema.json'
        schema_file = open(schema_path, 'w')
        schema_file.write(json.dumps(schema, indent=4))
        schema_file.close()

        json_schema = json_ref_dict.materialize(
            RefDict.from_uri(schema_path), context_labeller=statham.titles.title_labeller()
        )

        parsed_schema = statham.schema.parser.parse(json_schema)
        python_class = serialize_python(*parsed_schema)

        python_class_file = open('uniman' + os.path.sep + 'model' + os.path.sep + endpoint.name.lower() + '.py', 'w')
        python_class_file.write(python_class)
        python_class_file.close()
Beispiel #25
0
    def start(
        self,
        transformation: Callable = None,
        asynchronous: bool = False,
        debug: bool = True,
    ):
        """
        Parameters
        ----------
        transformation: Callable, (default=None)
            a callable variable to transform tap data, this will auto generate new data schema.
        debug: bool, (default=True)
            If True, will print every rows emitted and parsed.
        asynchronous: bool, (default=False)
            If True, emit to targets in async manner, else, loop from first target until last target.
        """

        if not len(self._targets):
            raise Exception(
                'targets are empty, please add a target using `source.add()` first.'
            )
        self._pipes = []
        for target in self._targets:
            if isinstance(target, str):
                p = Popen(target.split(), stdout=PIPE, stdin=PIPE, stderr=PIPE)
                t = helper.Check_Error(p)
                t.start()
            else:
                p = target

            self._pipes.append(helper.Target(p, target))

        if isinstance(self.tap, str):
            pse = Popen(self.tap.split(), stdout=PIPE, stdin=PIPE, stderr=PIPE)
            t = helper.Check_Error(pse)
            t.start()

            pse = iter(pse.stdout.readline, b'')
        else:
            pse = self.tap

        if transformation:
            from genson import SchemaBuilder

            builder = SchemaBuilder()
            builder.add_schema({'type': 'object', 'properties': {}})
        else:
            builder = None

        for lines in pse:
            if lines is None:
                break
            if isinstance(lines, bytes):
                lines = [lines]
            if transformation:
                lines = helper.transformation(lines, builder, transformation)
            for line in lines:
                line = line.decode().strip()
                if len(line):
                    if debug:
                        logger.info(line)

                    self._tap_count.inc()
                    self._tap_data.observe(sys.getsizeof(line) / 1000)
                    self._tap_data_histogram.observe(
                        sys.getsizeof(line) / 1000)

                    if asynchronous:

                        @gen.coroutine
                        def loop():
                            r = yield [
                                _sinking(line, pipe) for pipe in self._pipes
                            ]

                        result = loop()
                        if debug:
                            logger.info(result.result())

                    else:
                        for pipe in self._pipes:
                            result = _sinking(line, pipe)
                            if debug:
                                logger.info(result.result())

        for pipe in self._pipes:
            if isinstance(pipe.target, Popen):
                try:
                    pipe.target.communicate()
                except:
                    pass
Beispiel #26
0
    def start(
        self,
        transformation: Callable = None,
        asynchronous: bool = False,
        debug: bool = True,
        ignore_null: bool = True,
        graceful_shutdown: int = 30,
    ):
        """
        Parameters
        ----------
        transformation: Callable, (default=None)
            a callable variable to transform tap data, this will auto generate new data schema.
        asynchronous: bool, (default=False)
            If True, emit to targets in async manner, else, loop from first target until last target.
        debug: bool, (default=True)
            If True, will print every rows emitted and parsed.
        ignore_null: bool, (default=True)
            If False, if one of schema value is Null, it will throw an exception.
        graceful_shutdown: int, (default=30)
            If bigger than 0, any error happened, will automatically shutdown after sleep.
        """
        if graceful_shutdown < 0:
            raise ValueError('`graceful_shutdown` must bigger than -1')
        if not len(self._targets):
            raise Exception(
                'targets are empty, please add a target using `source.add()` first.'
            )
        self._pipes = []
        for target in self._targets:
            if isinstance(target, str):
                p = Popen(target.split(), stdout=PIPE, stdin=PIPE, stderr=PIPE)
                t = helper.Check_Error(p, graceful_shutdown)
                t.start()
            else:
                p = target

            self._pipes.append(helper.Target(p, target))

        if isinstance(self.tap, str):
            pse = Popen(self.tap.split(), stdout=PIPE, stdin=PIPE, stderr=PIPE)
            t = helper.Check_Error(pse, graceful_shutdown)
            t.start()

            pse = iter(pse.stdout.readline, b'')
        else:
            pse = self.tap
            self.tap.tap.count = 0

        if transformation:
            from genson import SchemaBuilder

            builder = SchemaBuilder()
            builder.add_schema({'type': 'object', 'properties': {}})
        else:
            builder = None

        try:
            for lines in pse:
                if lines is None:
                    break
                if isinstance(lines, bytes):
                    lines = [lines]
                if transformation:
                    lines = helper.transformation(
                        lines,
                        builder,
                        transformation,
                        tap_schema=self.tap_schema,
                    )
                for line in lines:
                    line = line.decode().strip()
                    if len(line):
                        if debug:
                            logger.info(line)

                        if '"type": "SCHEMA"' in line and not ignore_null:
                            l = json.loads(line)
                            for k, v in l['schema']['properties'].items():
                                if v['type'].lower() == 'null':
                                    raise ValueError(
                                        f'{k} is a NULL, some of database cannot accept NULL schema. To ignore this exception, simply set `ignore_null` = True.'
                                    )

                        self._tap_count.inc()
                        self._tap_data.observe(sys.getsizeof(line) / 1000)
                        self._tap_data_histogram.observe(
                            sys.getsizeof(line) / 1000)

                        if asynchronous:

                            @gen.coroutine
                            def loop():
                                r = yield [
                                    _sinking(line, pipe)
                                    for pipe in self._pipes
                                ]

                            result = loop()
                            if debug:
                                logger.info(result.result())

                        else:
                            for pipe in self._pipes:
                                result = _sinking(line, pipe)
                                if debug:
                                    logger.info(result.result())

                        if '"type": "RECORD"' in line and not isinstance(
                                self.tap, str):
                            self.tap.tap.count += 1

            for pipe in self._pipes:
                if isinstance(pipe.target, Popen):
                    try:
                        pipe.target.communicate()
                    except:
                        pass

        except Exception as e:
            if graceful_shutdown > 0:
                logger.error(e)
                time.sleep(graceful_shutdown)
                os._exit(1)
            else:
                raise Exception(e)
Beispiel #27
0
def load_jsons(path):
    jj = []
    if os.path.isdir(path):
        files = os.listdir(path)
        for f in files:
            j = json.load(open("%s/%s" % (path, f)))
            jj.append(j)

    else:
        j = json.load(open(path))
        jj.append(j)
    return jj


if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("args <path_json_or_dir>")
        exit()
    path = sys.argv[1]

    builder = SchemaBuilder()
    builder.add_schema({"type": "object", "properties": {}})
    jj = load_jsons(path)
    for j in jj:
        #print(jj)
        builder.add_object(j)

    #print(builder.to_schema())
    print(builder.to_json(indent=2))
Beispiel #28
0
    def ProfileDataStream(self, request_iterator, context):
        request_id = "none"
        builder = SchemaBuilder()
        builder.add_schema({"type": "object", "properties": {}})
        error = domain_pb2.ProfilerError(
            type=domain_pb2.ProfilerError.Type.Value('UNKNOWN'))

        message = profiler_pb2.ProfileDataStreamResponse()
        total_records = 0
        record_list = []

        try:
            for record in request_iterator:
                total_records += 1
                request_id = record.request_id
                if total_records == 1:
                    logging.info(
                        'started profiling for request %s with config %s' %
                        (request_id, self.config_path))
                json_data = json.loads(record.json_data)
                record_list.append(json_data)

            for jd in record_list:
                builder.add_object(jd)
            data_frame = pd.DataFrame(json_normalize(record_list, sep='/'))

            profile = None
            report_length = 0
            try:
                profile = run_profiler(data_frame)
            except FunctionTimedOut as te:
                err_msg = 'profile timeout for request_id %s after %ss data_frame shape (rows, cols): %s' % \
                          (request_id, te.timedOutAfter, data_frame.shape)
                logging.warning(err_msg)
                error = domain_pb2.ProfilerError(
                    message=err_msg,
                    type=domain_pb2.ProfilerError.Type.Value(
                        'PROFILE_EXCEPTION'))

            except Exception as e:
                logging.error('generic exception in timeout', e)
                error = domain_pb2.ProfilerError(
                    message=str(e),
                    type=domain_pb2.ProfilerError.Type.Value(
                        'PROFILE_EXCEPTION'))

            schema = builder.to_schema()

            if profile is not None:
                html = profile.to_html()

                html = minify(html,
                              remove_all_empty_space=True,
                              remove_comments=True)

                report_length = len(html)

            schema_json = json.dumps(schema)
            schema_length = len(schema_json)
            logging.info(
                'profiling complete for request %s total_records: %s, schema_length: %s, report_length: %s'
                % (request_id, total_records, schema_length, report_length))

            profile_stream = []

            # The max message size of a GRPC call in bytes is 4194304. The header includes 5 bytes, 1 for
            # the compressed flag and 4 for the unsigned integer. Therefore should be 4194299
            MAX_MESSAGE_SIZE = 4194299

            if report_length == 0 or html is None:
                profile_stream.append('')
            elif report_length < MAX_MESSAGE_SIZE:
                profile_stream.append(html)
            else:
                last = 0
                while last + MAX_MESSAGE_SIZE < report_length:
                    profile_stream.append(html[last:last + MAX_MESSAGE_SIZE])
                    last = last + MAX_MESSAGE_SIZE
                profile_stream.append(html[last:report_length])

            if error is not None and error.type != domain_pb2.ProfilerError.Type.Value(
                    'UNKNOWN'):
                message.meta.error.message = error.message
                message.meta.error.type = error.type

            message.meta.request_id = request_id
            message.meta.schema = schema_json
            message.meta.total_records = total_records
            message.meta.service_version = os.getenv(
                'SDM_PROFILER_SERVICE_VERSION', 'default')
            message.meta.schema_byte_size = schema_length
            message.meta.profile_byte_size = report_length

            yield message

            for idx, profile_portion in enumerate(profile_stream):
                message = profiler_pb2.ProfileDataStreamResponse()
                message.profile = profile_portion
                yield message
            return

        except json.decoder.JSONDecodeError as e:
            first_chars = '><'
            if record is not None and record.json_data is not None:
                first_chars = '>' + record.json_data[0:10] + '<'
            err_msg = 'profiling failed for request %s with error %s %s, record nr: %s first 10 chars %s' % \
                      (request_id, type(e), e, total_records, first_chars)
            logging.error(err_msg)
            error = domain_pb2.ProfilerError(
                message=err_msg,
                type=domain_pb2.ProfilerError.Type.Value('UNKNOWN_ENCODING'))

        except Exception as e:
            logging.error('profiling failed for request %s with error %s' %
                          (request_id, e))
            error = domain_pb2.ProfilerError(
                message=str(e),
                type=domain_pb2.ProfilerError.Type.Value('NO_DATA'))

        message.meta.request_id = request_id
        message.meta.error.message = error.message
        message.meta.error.type = error.type
        yield message
def build_schema(json_sample):
    builder = SchemaBuilder()
    builder.add_schema({"type": "object", "properties": {}})
    builder.add_object(json_sample)
    return builder.to_schema()