def merge(self): self._load() merger = Merger(self.schema) self.merged = self.jsons[0] for i in range(1, len(self.jsons), 1): self.merged = merger.merge(self.merged, self.jsons[i]) return self
def __init__(self): self.extendAmass() for domain in self.DOMAINS: self.extendDomains(domain) for entry2 in self.subDomainsBrute(): duplicate = False for i, entry in enumerate(self.data): if entry['name'] == entry2['name']: duplicate = True merger = Merger(self.SCHEMA) self.data[i] = merger.merge(entry, entry2) if not duplicate: self.data.append(entry2) for _ in self.IPS: for entry2 in self.ip(_): duplicate = False for i, entry in enumerate(self.data): if 'addresses' in entry.keys(): for address in entry['addresses']: if address['ip'] == entry2['addresses'][0]['ip']: duplicate = True break if duplicate: break if not duplicate: self.data.append(entry2)
def merge_data_sources(path_from: str, path_to: str): from_dirs = os.listdir(path_from) to_dirs = os.listdir(path_to) schema = {'properties': {'submissions': {'mergeStrategy': 'append'}}} merger = Merger(schema) for rd in from_dirs: from_dir = OPJ(path_from, rd) to_dir = OPJ(path_to, rd) if rd not in to_dirs: shutil.copytree(from_dir, to_dir, dirs_exist_ok=True) continue with open(OPJ(to_dir, 'meta.json'), 'r', encoding='utf-8') as fp: lhs_meta_json = json.load(fp) with open(OPJ(from_dir, 'meta.json'), 'r', encoding='utf-8') as fp: rhs_meta_json = json.load(fp) result = merger.merge(lhs_meta_json, rhs_meta_json) shutil.copytree(from_dir, to_dir, dirs_exist_ok=True) with open(OPJ(to_dir, 'meta.json'), 'w', encoding='utf-8') as fp: json.dump(result, fp, indent=4)
def merge_clinical_trial_metadata(patch: dict, target: dict) -> Tuple[dict, List[str]]: """ Merges two clinical trial metadata objects together Args: patch: the metadata object to add target: the existing metadata object Returns: arg1: the merged metadata object arg2: list of validation errors """ validator: _Validator = load_and_validate_schema("clinical_trial.json", return_validator=True) # uncomment to assert original object is valid # try: # validator.validate(target) # except jsonschema.ValidationError as e: # raise InvalidMergeTargetException( # f"Merge target is invalid: {target}\n{e}" # ) from e # assert the un-mutable fields are equal # these fields are required in the schema # so previous validation assert they exist if patch.get(PROTOCOL_ID_FIELD_NAME) != target.get(PROTOCOL_ID_FIELD_NAME): raise InvalidMergeTargetException( "Unable to merge trials with different " + PROTOCOL_ID_FIELD_NAME) # merge the two documents merger = Merger(validator.schema, strategies=PRISM_MERGE_STRATEGIES) merged = merger.merge(target, patch) return merged, list(validator.iter_error_messages(merged))
def get_data_zomato_api(*args, **kwargs): api_hook = HttpHook(http_conn_id="zomato_api", method='GET') data_dict = {} schema = {"properties": {"restaurants": {"mergeStrategy": "append"}}} merger = Merger(schema) for i in range(0, 100, 20): endpoint_url = "search?entity_id=3&entity_type=city&start={}&count=20&sort=rating".format( i) resp_url = api_hook.run(endpoint=endpoint_url) resp = json.loads(resp_url.content) if i == 0: data_dict.update(resp) result = data_dict else: result = merger.merge(result, resp) with open( "/Users/preetiyerkuntwar/documents/Zomato-test/all_restro.json", "w") as f: json.dump(result, f) f.close()
def test_overwrite_any(): """Test that the alias for jsonmerge.strategies.Overwrite is set up properly""" schema = { "type": "object", "properties": { "a": { "type": "object", "mergeStrategy": "overwriteAny" }, "b": { "type": "number" }, }, } merger = Merger(schema, strategies=prism_merger.PRISM_MERGE_STRATEGIES) # Updates to "a" should be allowed base = {"a": {"foo": "bar"}, "b": 1} head = {"a": {"foo": "buzz"}, "b": 1} assert merger.merge(base, head) == head # Updates to "b" still should not be allowed head["b"] = 2 with pytest.raises(prism_merger.MergeCollisionException, match="mismatch of b=1.*and b=2"): merger.merge(base, head)
def result(): chat_name = request.form.get("chat_room_name") name_check = 1 chatnamelist = {"main" , "blue"} schema = { "properties" : { chat_name: { "mergeStrategy": "append" } } } merger = Merger(schema) chatdat = {} chatdat[chat_name]=[] with open('datas.json', 'r') as readfile: chatdata = json.load(readfile) for names in chatdata: if names == chat_name: name_check = 0 break else: name_check = 1 if name_check == 0: return render_template ("error.html", message = "Chat room name has alreade used. Please choose other name") else: result = merger.merge(chatdat, chatdata) with open('datas.json', 'w') as outfile: json.dump(result, outfile, indent=4) return render_template("error.html",message ="success of chatroom")
def users_validate(jsonfile, verbose, debug): """Check users validation.""" click.secho('Validate user file', fg='green') path = current_jsonschemas.url_to_path(get_schema_for_resource('ptrn')) ptrn_schema = current_jsonschemas.get_schema(path=path) ptrn_schema = records_state.replace_refs(ptrn_schema) # TODO: get user schema path programaticly # path = current_jsonschemas.url_to_path(get_schema_for_resource('user')) path = 'users/user-v0.0.1.json' user_schema = current_jsonschemas.get_schema(path=path) user_schema = records_state.replace_refs(user_schema) merger_schema = {"properties": {"required": {"mergeStrategy": "append"}}} merger = Merger(merger_schema) schema = merger.merge(user_schema, ptrn_schema) schema['required'] = [ s for s in schema['required'] if s not in ['$schema', 'user_id'] ] datas = read_json_record(jsonfile) for idx, data in enumerate(datas): if verbose: click.echo(f'\tTest record: {idx}') try: validate(data, schema) except ValidationError as err: click.secho( f'Error validate in record: {idx} pid: {data.get("pid")}', fg='red') if debug: click.secho(str(err)) else: trace_lines = traceback.format_exc(1).split('\n') click.secho(trace_lines[3].strip())
def make_merger(self, stratname, typename): """ return a Merger instance using a set of strategies having a name and a schema for a given type. :param stratname str: a name for the set of strategies to use. This corresponds to a set of schemas that have merge strategies encoded into them. :param typename str: a name for the particular type that the data to be merged conform to. """ stratdir = os.path.join(self.root, stratname) if stratname.startswith('.') or not os.path.exists(stratdir): raise MergeError("Strategy convention not recognized: " + stratname) cache = ejsl.DirectorySchemaCache(stratdir) schemafile = os.path.join(stratdir, "{0}-schema.json".format(typename)) if not os.path.exists(schemafile): raise MergeError("Schema Type name not supported: " + typename) with open(schemafile) as fd: schema = json.load(fd) out = Merger(schema, self.strategies, 'OrderedDict') for schema in list(cache.schemas().values()): out.cache_schema(schema) return out
def merge_all_jsons(directory=r'/content/jsons/'): # define o schema (regras para o merge) schema = {"properties": {"denotations": {"mergeStrategy": "append"}}} # cria o objeto de merge: merger = Merger(schema) # armazena as informações do 1° arquivo .json, para depois adicionar mais informações a ele: with open('/content/jsons/ner0.json', 'r') as f: result = json.load(f) # adiciona o atributo "parcial_text" nas anotações do 1° arquivo .json (acima) # esse atributo será útil para saber de qual bloco de texto a palavras será recuperada depois for i in range(len(result['denotations'])): result['denotations'][i].update({'parcial_text': 0}) # percorre o diretório de todos os arquivos .json: for filename in os.listdir(directory): if (filename != 'final.json' and filename != 'ner0.json' and filename.endswith('.json')): with open(directory + filename) as f: jsonObject = json.load(f) # lê o arquivo e cria o objeto json flag = 'denotations' in jsonObject if (flag): for i in range(len(jsonObject['denotations']) ): # percorre as anotações e adiciona o atributo jsonObject['denotations'][i].update( {'parcial_text': int(filename[3])}) result = merger.merge(result, jsonObject) # faz o merge das informações # escreve o arquivo .json final: with open('content/jsons/final.json', 'w', encoding='utf-8') as f: json.dump(result, f, indent=4)
def create_new_style(multiple_style_data, new_style_data, multiple_style_state): """ Create the new style with the new layer """ # Merge the sources of the original style with the new style into the schema_sources = {"properties": {"sources": {"mergeStrategy": "append"}}} merger = Merger(schema_sources) sources = merger.merge(new_style_data['sources'], multiple_style_data['sources']) multiple_style_data['sources'] = sources # Add the layers of the new style into the original style for i in range(len(new_style_data['layers'])): multiple_style_data['layers'].append(new_style_data['layers'][i]) # Clean the json file if multiple_style_state: multiple_style_data = repr(multiple_style_data).replace("True", "true") else: multiple_style_data = repr(multiple_style_data).replace( "False", "true") remove_char = "'" for char in remove_char: multiple_style_data = repr(multiple_style_data).replace(char, '"') multiple_style_json = ujson.loads(multiple_style_data[1:-1]) # Create the new multiple style file with open(settings.MULTIPLE_STYLE_DIR, "w") as new_style_file: new_style_file.write(ujson.dumps(multiple_style_json, indent=4))
def join_json_state(record_json_dir, agents, finished_at, config): '''Combines all of the json state files into one''' json_schema = {"properties": {"state": {"mergeStrategy": "append"}}} json_template = { "agents": agents, "finished_at": finished_at, "config": config, "state": [] } merger = Merger(json_schema) base = merger.merge({}, json_template) for root, dirs, files in os.walk(record_json_dir): for name in files: path = os.path.join(record_json_dir, name) if name.endswith('.json') and "game_state" not in name: with open(path) as data_file: data = json.load(data_file) head = {"state": [data]} base = merger.merge(base, head) with open(os.path.join(record_json_dir, 'game_state.json'), 'w') as f: f.write(json.dumps(base, sort_keys=True, indent=4)) for root, dirs, files in os.walk(record_json_dir): for name in files: if "game_state" not in name: os.remove(os.path.join(record_json_dir, name))
def test_merge_core(): # create aliquot aliquot = {"cimac_aliquot_id": "1234"} # create the sample. sample = { "cimac_sample_id": "S1234", "site_sample_id": "blank", "aliquots": [aliquot] } # create the participant participant = { "cimac_participant_id": "P1234", "trial_participant_id": "blank", "samples": [sample] } # create the trial ct1 = {"lead_organization_study_id": "test", "participants": [participant]} # create validator assert schemas are valid. validator = load_and_validate_schema("clinical_trial.json", return_validator=True) schema = validator.schema validator.validate(ct1) # create a copy of this, modify participant id ct2 = copy.deepcopy(ct1) ct2['participants'][0]['cimac_participant_id'] = "PABCD" # merge them merger = Merger(schema) ct3 = merger.merge(ct1, ct2) # assert we have two participants and their ids are different. assert len(ct3['participants']) == 2 assert ct3['participants'][0]['cimac_participant_id'] == ct1[ 'participants'][0]['cimac_participant_id'] assert ct3['participants'][1]['cimac_participant_id'] == ct2[ 'participants'][0]['cimac_participant_id'] # now lets add a new sample to one of the participants ct4 = copy.deepcopy(ct3) sample2 = ct4['participants'][0]['samples'][0] sample2['cimac_sample_id'] = 'new_id_1' ct5 = merger.merge(ct3, ct4) assert len(ct5['participants'][0]['samples']) == 2 # now lets add a new aliquot to one of the samples. ct6 = copy.deepcopy(ct5) aliquot2 = ct6['participants'][0]['samples'][0]['aliquots'][0] aliquot2['cimac_aliquot_id'] = 'new_ali_id_1' ct7 = merger.merge(ct5, ct6) assert len(ct7['participants'][0]['samples'][0]['aliquots']) == 2
def person_put(person_id=''): """ Update an existing person swagger_from_file: api_doc/person_put.yml """ if request.headers.get('Content-Type') == 'application/json': if is_token_valid(request.headers.get('Authorization')): addition_person = json.loads(request.data.decode("utf-8")) result = persistence.get_person(person_id) if result: original_person = json.loads(result.get('wtf_json')) if addition_person.get('id') and addition_person.get( 'id') != original_person.get('id'): return make_response( 'Conflict: The ID of the additional data already exists as "same_as"! Please check your data!', 409) else: # init merger "person" with open('conf/person_merger.schema.json') as data_file: schema_person_merger = json.load(data_file) merger = Merger(schema_person_merger) # merge it! merged_person = merger.merge(original_person, addition_person) # load it! form = PersonAdminForm.from_json(merged_person) form.changed.data = timestamp() doit, new_id, message = persistence.person2solr( form, action='update') response_json = { "message": message, "person": merged_person } return make_response(json.dumps(response_json, indent=4), 201) else: return make_response( 'person resource \'%s\' not found!' % person_id, 404) else: return make_response('Unauthorized', 401) else: return make_response('Bad request: invalid accept header!', 400)
def load(files): schema = {"properties": {"bar": {"mergeStrategy": "append"}}} merger = Merger(schema) config = json.load( open( os.path.dirname(os.path.abspath(__file__)) + '/../resources/default-config.json')) for file in files: custom_config = json.load(open(file)) config = merger.merge(config, custom_config) return config
def merge(self, *args): """public method that allows the addition of an arbitrary number of objects""" merger = Merger(self.m_schema) checker = FormatChecker() res = None for item in args: try: validate(item, self.v_schema, format_checker=checker) res = merger.merge(res, item) except Exception as e: self.log.warning(e) return res
def merge_head_to_base(base, head): schema = { "properties": { "ConnectorGallery": { "type": "array", "mergeStrategy": "arrayMergeById", "mergeOptions": { "idRef": "Id" } } } } merger = Merger(schema) result = merger.merge(base, head) return result
def scan(self): bannerGrabResults = self.bannerGrab() nmapScriptResults = self.runNmapScripts() fileListingResults = self.getFiles() schema = {'properties': {'items': {'type': 'objects'}}} merger = Merger(schema) results = None results = merger.merge(results, bannerGrabResults) results = merger.merge(results, nmapScriptResults) results = merger.merge(results, fileListingResults) return {self.port: results}
def archive(): with open("schema2.json") as out: schem = json.loads(out.read()) merger = Merger(schem) titre = time.strftime('%x').replace('/', '-') #---Var.price convertit en json array---- ArchStream = "[" for x in range(1, len(Var.price["Time"]) - 1): ArchStream += str(nonArchiver(x)) + "," ArchStream = ArchStream[:-1] # Remove last virgule ArchStream += "]" #print("ArchStream:\n"+ArchStream) #------------------------------------------- try: # x pour create rights only open("archive/" + titre + '.json', 'x') except FileExistsError as e: pass # <--warning/log with open("archive/" + titre + '.json', 'r+') as out: a = out.read() out.seek(0) if not a: # en pratique je crois qu'il n'est jamais invoqué base = { "instrumentType": "", "metadata": {}, "prices": { "snapshotTime": "01/01/2017 00:00:00", "snapshotTimeUTC": "01/01/2017T00:00:00", "closePrice": { "bid": 0, "ask": 0, "lastTraded": 0 } } } else: base = ast.literal_eval(a) head = json.loads(ArchStream.replace("'", '"')) newBase = merger.merge(base, head) json.dump(newBase, out) out.close()
def download_data(self): counter = 0 interface.status.set("Downloading data please wait..." + " \n Data sets downloaded: " + str(counter)) interface.gui.update_idletasks() api_endpoint = 'https://arbeidsplassen.nav.no/public-feed/api/v1/ads?size=100' api_headers = { 'accept': 'application/json', 'Authorization': 'Bearer ' + self.token } merg_schema = {"properties": {"content": {"mergeStrategy": "append"}}} merger = Merger(merg_schema) download = requests.get(url=api_endpoint, headers=api_headers) if download.status_code == 200: self.data = download.json() download_active = True while download_active is True: latest_download = ( self.data['content'][len(self.data['content']) - 1]['published'])[0:19] download = requests.get(url=api_endpoint + "&published=[*," + latest_download + ")", headers=api_headers) counter += 1 interface.status.set("Downloading data please wait..." + " \n Data sets downloaded: " + str(counter) + " \n Total job ads fetched: " + str(len(self.data['content']))) interface.gui.update_idletasks() self.data = merger.merge(self.data, download.json()) # if counter == 2: # Dev option to not download all ads, but instead a limited number, makes for faster testing. remove the # on this and the line bellow to use # download_active = False if latest_download == ( self.data['content'][len(self.data['content']) - 1]['published'])[0:19]: download_active = False self.lift_data() elif download.status_code == 401: interface.status.set( "Error 401 not authorized, public token likely expired \n go to settings --> 'update Token' and input a new one, then press Re-download in the settings menu to update. \n Get a new token at: https://github.com/navikt/pam-public-feed" ) else: interface.status.set(" http error: " + str(download.status_code) + "\n program failed try again")
def merge_object(data_dir, max_file_size): """" Generates and returns a merge object for merging files """ with open(data_dir) as input_file: obj = json.load(input_file) if sizeof(obj) > max_file_size: raise ValueError return Merger(schema_builder(obj)), list(obj.keys())
def merge_jsons(dir, clean_files = True): json_list = [pos_json for pos_json in os.listdir(dir) if pos_json.endswith('.json')] if "annotations.json" in json_list: json_list.remove("annotations.json") json_list = sorted(json_list, key=lambda i: int(i.split("_")[1])) #not necessary, comment out if names of jsons don't contain numbers merger = Merger(schema) annot = None for i in json_list: with open(os.path.join(dir, i), "r") as f: if annot is None: annot = json.load(f) else: a = json.load(f) annot = merger.merge(annot, a) if clean_files: os.remove(os.path.join(args.d, i)) with open(os.path.join(args.d, "annotations.json"), "w") as f: json.dump(annot, f)
def run(self): Reporter.report_configuration(self) self.results = {"pass": 0, "fail": 0, "skip": 0, "timeout": 0} with open(fs.join(path.TEST_ROOT, "testsets.json")) as testsets_file: testsets = json.load(testsets_file, object_pairs_hook=OrderedDict) if self.testsets: with open(fs.join(path.TEST_ROOT, self.testsets)) as testsets_file: ext_testsets = json.load(testsets_file, object_pairs_hook=OrderedDict) merger = Merger(JSON_SCHEMA) testsets = merger.merge(testsets, ext_testsets) for testset, tests in testsets.items(): self.run_testset(testset, tests) Reporter.report_final(self.results)
def main(args=None): parser = argparse.ArgumentParser() parser.add_argument('base', type=argparse.FileType('r', encoding='utf_8'), help='the document we are merging changes into') parser.add_argument('head', nargs='+', type=argparse.FileType('r', encoding='utf_8'), help='changed documents') parser.add_argument('--schema', type=argparse.FileType('r', encoding='utf_8'), default='fresh-resume-schema/schema/fresh-resume-schema_1.0.0-beta.json', help='JSON schema that defines merge strategies') namespace = parser.parse_args(args) schema = {} if namespace.schema is not None: schema = json.load(namespace.schema) merger = Merger(schema) result = functools.reduce(lambda x,y: merger.merge(x, json.load(y)), namespace.head, json.load(namespace.base)) json.dump(result, sys.stdout) sys.stdout.write('\n') sys.exit(0)
def loadData(base_url,days_to_report,merge_field = "Groupings",debug=False): moreData = True data = None data_full = dict() records_read = 100 # Add date filters to the url date_filter_url = base_url + "&" + getStartEndFilterString(days_to_report) # We need to handle merging the data into one response # https://pypi.python.org/pypi/jsonmerge schema = json.loads('{ "properties": { "' + merge_field + '": { "mergeStrategy": "append"}}}') if debug: log("schema is %s" % schema) merger = Merger(schema) calling_url = date_filter_url while moreData: if debug: log("Calling URL %s" % str(calling_url)) try: response = urllib2.urlopen(calling_url) data = json.loads(response.read()) except urllib2.URLError as e: log("ERROR: Unable to open URL %s : %s" % (date_filter_url,e.reason)) # Merge the results...needed if the data is paginated. if data: data_full = merger.merge(data_full,data) # Result data could be paginated # See http://support.cloudcheckr.com/cloudcheckr-api-userguide/ if data and 'HasNext' in data and data['HasNext'] == True: log("Read %s records. More data to read" % records_read) if debug: log("more data to read") calling_url = date_filter_url + "&next_token=" + data['NextToken'] records_read = records_read+100 else: moreData = False return data_full
def merge(base: dict, head: dict) -> dict: """ Merge base with head dict. :param base: dict to get merged with head. :type base: dict :param head: dict to merge in base. :type head: dict :return: The merged dict. :rtype: dict """ merger = Merger( { "oneOf": [ {"type": "array", "mergeStrategy": "append"}, {"type": "object", "additionalProperties": {"$ref": "#"}}, {"type": "string"}, {"type": "number"}, ] } ) return merger.merge(base, head)
def create_new_style(multiple_style_data, new_style_data, multiple_style_state): """ Create the new style with the new layer """ # Merge the sources of the original style with the new style into the schema_sources = { "properties": { "sources": { "mergeStrategy": "append" } } } merger = Merger(schema_sources) sources = merger.merge(new_style_data['sources'], multiple_style_data['sources']) multiple_style_data['sources'] = sources # Add the layers of the new style into the original style for i in range(len(new_style_data['layers'])): multiple_style_data['layers'].append(new_style_data['layers'][i]) # Clean the json file if multiple_style_state: multiple_style_data = repr(multiple_style_data).replace("True", "true") else: multiple_style_data = repr(multiple_style_data).replace("False", "true") remove_char = "'" for char in remove_char: multiple_style_data = repr(multiple_style_data).replace(char, '"') multiple_style_json = ujson.loads(multiple_style_data[1:-1]) # Create the new multiple style file with open(settings.MULTIPLE_STYLE_DIR, "w") as new_style_file: new_style_file.write(ujson.dumps(multiple_style_json, indent=4))
def vote(data): selection = data["selection"] message = data["message"] loginname = data["loginname"] chat_name = data["chat_name"] timestamp1 = datetime.datetime.now() timestamp=timestamp1.strftime("%c") schema = { "properties" : { chat_name: { "mergeStrategy": "append" } } } merger = Merger(schema) with open('datas.json', 'r') as readfile: chatdata = json.load(readfile) fullcount = len(chatdata[chat_name]) count=100 while count < fullcount: del chatdata[chat_name][-1] count = count + 1 chatdat = {} chatdat[chat_name]=[] chatdat[chat_name].append({'chatter':loginname, 'message':message, 'timestamp': timestamp}) result = merger.merge(chatdat, chatdata) with open('datas.json', 'w') as outfile: json.dump(result, outfile, indent=4) emit("announce vote", {"timestamp": timestamp , "message": message, "loginname": loginname }, broadcast=True)
def __init__(self, folder_path, prefix='domain', output='aggregated_domains.yaml'): self.output = os.path.join(folder_path, output) self.schema = { "properties": { "actions": { "mergeStrategy": "arrayMergeById", "mergeOptions": {"idRef": "/"} }, "intents": { "mergeStrategy": "arrayMergeById", "mergeOptions": {"idRef": "/"} }, "entities": { "mergeStrategy": "arrayMergeById", "mergeOptions": {"idRef": "/"} } } } path_pattern = u'{}/{}*.y*ml'.format(folder_path, prefix) self.list_of_files = glob.glob(path_pattern) self.jsons = [] self.merger = Merger(self.schema) self.merged = {} yaml.add_representer(OrderedDict, DomainsMerger.represent_ordereddict)
def scan(self): bruteResults = self.dirBrute() nmapScriptResults = self.runNmapScripts() bannerGrabResults = self.bannerGrab() spiderResults = self.spider() htmlCommentResults = self.getHtmlComments() schema = {'properties': {'items': {'type': 'objects'}}} merger = Merger(schema) results = None results = merger.merge(results, bruteResults) results = merger.merge(results, nmapScriptResults) results = merger.merge(results, bannerGrabResults) results = merger.merge(results, spiderResults) results = merger.merge(results, htmlCommentResults) return {self.port: results}
def __merge_scan_res(self): """merge of the received object with the main storage""" merger = Merger(self.m_schema) self.data = merger.merge(self.data, self.last_received)
def test_assay_merge(): # two wes assays. a1 = { "lead_organization_study_id": "10021", "participants": [{ "samples": [{ "genomic_source": "Tumor", "aliquots": [{ "assay": { "wes": { "assay_creator": "Mount Sinai", "assay_category": "Whole Exome Sequencing (WES)", "enrichment_vendor_kit": "Twist", "library_vendor_kit": "KAPA - Hyper Prep", "sequencer_platform": "Illumina - NextSeq 550", "paired_end_reads": "Paired", "read_length": 100, "records": [{ "library_kit_lot": "lot abc", "enrichment_vendor_lot": "lot 123", "library_prep_date": "2019-05-01 00:00:00", "capture_date": "2019-05-02 00:00:00", "input_ng": 100, "library_yield_ng": 700, "average_insert_size": 250 }] } }, "cimac_aliquot_id": "Aliquot 1" }], "cimac_sample_id": "Sample 1" }], "cimac_participant_id": "Patient 1" }] } # create a2 and modify ids to trigger merge behavior a2 = copy.deepcopy(a1) a2['participants'][0]['samples'][0][ 'cimac_sample_id'] = "something different" # create validator assert schemas are valid. validator = load_and_validate_schema("clinical_trial.json", return_validator=True) schema = validator.schema # merge them merger = Merger(schema) a3 = merger.merge(a1, a2) assert len(a3['participants']) == 1 assert len(a3['participants'])