def transform_gen3(item_paths, output_dir, project_id, compresslevel=0): """Creates gen3.treatment, returns set of treatment_ids.""" case_lookup = { line['MRN']: line['OPTR'] for line in reader('{}/bcc-cases.tsv'.format('source/bcc')) } biomarker_emitter = emitter('bcc_biomarker', output_dir=output_dir) for item_path in item_paths: biomarkers = [line for line in reader(item_path)] # missing_cases = [b['MRN'] for b in biomarkers if b['MRN'] not in case_lookup] def add_case(b): case_submitter_id = case_lookup[b['MRN']] submitter_id = '{}-{}-bcc_biomarker'.format( case_submitter_id, b['ID_Event']) for p in [ "MRN", "Participant ID", "_not_available_notes", "_not_available_reason_id", "cBiomarker Label dont use", ]: del b[p] for p in [ "CA19 Values After Specimen Collection", "Order Proc ID", "assay version id", "biomarker level", "unit of measure id", ]: new_p = p.replace(' ', '_').lower() b[new_p] = b[p] del b[p] b['cbiomarker_label'] = b["cBiomarker Label use this"] del b["cBiomarker Label use this"] biomarker = { 'type': 'bcc_biomarker', 'cases': { 'submitter_id': case_submitter_id }, 'submitter_id': submitter_id, 'project_id': project_id } biomarker.update(b) return biomarker biomarkers_with_case = [ add_case(b) for b in biomarkers if b['MRN'] in case_lookup ] print('there are', len(biomarkers_with_case), 'biomarkers with cases, out of ', len(biomarkers), 'biomarkers') [ biomarker_emitter.write(obscure_dates(b)) for b in biomarkers_with_case ] biomarker_emitter.close()
def transform(item_paths, output_dir, experiment_code, compresslevel=0): """Read bcc labkey json and writes gen3 json.""" aliquots_emitter = emitter('aliquot', output_dir=output_dir) for line in reader('{}/sample.json'.format(output_dir)): assert 'submitter_id' in line, line aliquots_emitter.write( default_aliquot(line['submitter_id'], project_id=DEFAULT_PROJECT_ID)) aliquots_emitter.close()
def transform(item_paths, output_dir, experiment_code, compresslevel=0): """Reads bcc labkey json and writes participantid, dob json.""" dob_emitter = emitter('bcc_participant_dob', output_dir=output_dir) for p in item_paths: for line in reader(p): dob_emitter.write({ 'participantid': line['ParticipantID'], 'DateOfBirth': line['DateOfBirth'] }) dob_emitter.close()
def transform(item_paths, output_dir, experiment_code, compresslevel=0): """Read medable csv and writes gen3 json.""" file_emitter = emitter('submitted_file', output_dir=output_dir) with open(item_paths[0], newline='') as csvfile: reader = csv.DictReader(csvfile) for row in reader: if exclude_row(row): continue case_submitter_id = row['c_public_user._id'] if len(case_submitter_id) == 0: continue if len(row['c_file.ETag']) == 0: continue submitter_id = '{}-sf'.format(row['_id']) # { # "*data_type": null, # "urls": null, # "*data_format": null, # "type": "submitted_file", # "object_id": null, # "*submitter_id": null, # "*data_category": null, # "*md5sum": null, # "*file_size": null, # "aliquots": { # "submitter_id": null # }, # "*file_name": null, # "cases": { # "submitter_id": null # }, # "project_id": null, # "state_comment": null, # "projects": { # "code": null # } # } file = { 'type': 'submitted_file', 'cases': { 'submitter_id': case_submitter_id }, 'submitter_id': submitter_id, 'project_id': DEFAULT_PROJECT_ID, 'data_type': row['c_file.mime'], 'md5sum': row['c_file.ETag'], 'file_size': row['c_file.size'], 'file_name': row['c_file.path'], } file_emitter.write(file) file_emitter.close()
def transform(item_paths, output_dir, experiment_code, compresslevel=0): """Read medable csv and writes gen3 json.""" somatic_variants_emitter = emitter('somatic_variants2', output_dir=output_dir) for line in reader(item_paths[0]): line['aliquot'] = {'submitter_id': line['aliquot']} line['submitter_id'] = '{}-{}-{}'.format(line['aliquot'], line['allele_id'], line['ensembl_transcript']) line['type'] = 'somatic_variant' del line['ensembl_transcript'] del line['allele_id'] somatic_variants_emitter.write(line) somatic_variants_emitter.close()
def transform(item_paths, output_dir, experiment_code, compresslevel=0): """Read bcc labkey json and writes gen3 json.""" samples_emitter = emitter('sample', output_dir=output_dir) for p in item_paths: source = os.path.splitext(os.path.basename(p))[0] for line in reader(p): sample_id = line.rstrip('\n') submitter_id = f"sample-{sample_id}" sample = { 'type': 'sample', 'cases': { 'submitter_id': sample_id }, 'submitter_id': submitter_id, 'project_id': DEFAULT_PROJECT_ID } samples_emitter.write(sample) samples_emitter.close()
def transform_old(item_paths, output_dir, experiment_code, compresslevel=0): """Read bcc labkey json and writes gen3 json.""" genes_emitter = emitter('gene', output_dir=output_dir) genes = {} for p in item_paths: for line in reader(p): case = { 'type': 'gene', 'experiments': { 'submitter_id': experiment_code }, 'submitter_id': line['participantid'] } if line['participantid'] in genes: # print('merge', line['participantid']) case = genes[line['participantid']] case.update(line) genes[line['participantid']] = case
def transform(item_paths, output_dir, experiment_code, compresslevel=0, callback=None): """Read bcc labkey json and writes gen3 json.""" bcc_aliquot_emitter = emitter('bcc_aliquot', output_dir=output_dir) for p in item_paths: source = os.path.splitext(os.path.basename(p))[0] for line in reader(p): line['source'] = source if callback: line = callback(line) bcc_aliquot = { 'type': 'bcc_aliquot', 'project_id': DEFAULT_PROJECT_ID, 'aliquot': {'submitter_id': '{}-aliquot'.format(line['sample_code'])}, 'submitter_id': line['lsid']} bcc_aliquot.update(line) bcc_aliquot = obscure_dates(bcc_aliquot, output_dir=output_dir) bcc_aliquot_emitter.write(bcc_aliquot) bcc_aliquot_emitter.close()
def transform(item_paths, output_dir, experiment_code, project_id, compresslevel=0, callback=None): """Read bcc labkey json and writes gen3 json.""" alleles_emitter = emitter('allele', output_dir=output_dir) alleles = {} for p in item_paths: for line in reader(p): if callback: line = callback(line) allele = { 'type': 'allele', 'aliquots': {'submitter_id': '{}-aliquot'.format(line['sample_code'])}, 'projects': {'code': 'reference'}, 'submitter_id': line['lsid']} if line['lsid'] in alleles: allele = alleles[line['lsid']] allele['project_id'] = project_id allele.update(line) alleles[line['lsid']] = allele for k in alleles: alleles[k] = obscure_dates(alleles[k], output_dir=output_dir) alleles_emitter.write(alleles[k]) alleles_emitter.close()
def transform_gen3(item_paths, output_dir, project_id, compresslevel=0): """Creates gen3.lesion, returns set of lesion_ids.""" cases = set([ line['submitter_id'] for line in reader('{}/case.json'.format(output_dir)) ]) observation_emitter = emitter('observation', output_dir=output_dir) observation_ids = set([]) missing_cases = [] for p, observation_type, callback in item_paths: source = os.path.splitext(os.path.basename(p))[0] for line in reader(p): participantid = line.get('ParticipantID', line.get('participantid', None)) assert participantid, 'ParticipantID not in {} {}'.format( p, line.keys()) case_submitter_id = participantid observation = default_observation(case_submitter_id, project_id, line['date'], observation_type, line) observation_submitter_id = observation['submitter_id'] if case_submitter_id not in cases: missing_cases.append( missing_parent(parent_id=case_submitter_id, parent_type='case', child_id=observation_submitter_id, child_type='observation')) continue if observation_submitter_id in observation_ids: continue observation_ids.add(observation_submitter_id) observation = obscure_dates( observation, output_dir=output_dir, participantid=observation['cases']['submitter_id']) observation_emitter.write(observation) save_missing_parents(missing_cases) return observation_ids
def transform_biomarker(item_paths, output_dir, project_id, observation_ids, compresslevel=0): """Read bcc labkey json and writes gen3 json.""" bcc_biomarker_emitter = emitter('bcc_biomarker', output_dir=output_dir) for p, observation_type, callback in item_paths: source = os.path.splitext(os.path.basename(p))[0] for line in reader(p): participantid = line.get('ParticipantID', line.get('participantid', None)) observation = default_observation(participantid, project_id, line['date'], observation_type, line) observation_submitter_id = observation['submitter_id'] biomarker_submitter_id = '{}-bcc_biomarker'.format( observation_submitter_id) if observation_submitter_id not in observation_ids: print( 'transform_biomarker {} not in observation_ids, skipping.'. format(biomarker_submitter_id)) continue bcc_biomarker = { 'type': 'bcc_biomarker', 'project_id': project_id, 'observation': { 'submitter_id': observation_submitter_id }, 'submitter_id': biomarker_submitter_id } line['source'] = source if callback: line = callback(line) bcc_biomarker.update(line) bcc_biomarker = obscure_dates(bcc_biomarker, output_dir=output_dir) bcc_biomarker_emitter.write(bcc_biomarker) bcc_biomarker_emitter.close()
def transform(item_paths, output_dir, experiment_code, compresslevel=0, callback=None): """Read bcc labkey json and writes gen3 json.""" genetrails_emitter = emitter('wes_result', output_dir=output_dir) with open('output/reference/gene_lookup.tsv') as f: gene_lookup = {k: v for k, v in (line.split() for line in f)} for p in item_paths: source = os.path.splitext(os.path.basename(p))[0] for line in reader(p): line['source'] = source if callback: line = callback(line) submitter_id = line.get('participantid', line.get('ParticipantID', None)) aliquot_id = '{}-sample-aliquot'.format(submitter_id) genetrails_variant = { 'type': 'wes_result', 'project_id': DEFAULT_PROJECT_ID, 'aliquot': { 'submitter_id': aliquot_id }, 'submitter_id': line['lsid'] } if 'gene_symbol' in line and line['gene_symbol'].lower( ) in gene_lookup: line['gene'] = { 'submitter_id': gene_lookup[line['gene_symbol'].lower()], 'project_id': 'smmart-reference' } genetrails_variant.update(line) genetrails_emitter.write(genetrails_variant) genetrails_emitter.close()
def transform(item_paths, output_dir, experiment_code, compresslevel=0): """Read medable csv and writes gen3 json.""" cases_emitter = emitter('case', output_dir=output_dir) cases = set([]) with open(item_paths[0], newline='') as csvfile: reader = csv.DictReader(csvfile) for row in reader: if exclude_row(row): continue submitter_id = row['c_public_user._id'] if len(submitter_id) == 0: continue cases.add(submitter_id) for submitter_id in cases: case = { 'type': 'case', 'experiments': { 'submitter_id': experiment_code }, 'submitter_id': submitter_id, 'project_id': DEFAULT_PROJECT_ID } cases_emitter.write(case)
def transform_chemotherapy(item_paths, output_dir, project_id, treatment_ids, compresslevel=0): """Read bcc labkey json and writes gen3 json.""" bcc_treatment_emitter = emitter('bcc_chemotherapy', output_dir=output_dir) for p,type, callback in item_paths: source = os.path.splitext(os.path.basename(p))[0] for line in reader(p): line['source'] = source if callback: line = callback(line) diagnosis_submitter_id = '{}-diagnosis'.format(line['ParticipantID']) treatment_submitter_id = '{}-Chemotherapy-{}'.format(diagnosis_submitter_id, get_uniq(line)) if treatment_submitter_id not in treatment_ids: # print('transform_chemotherapy {} not in treatment_ids, skipping.'.format(treatment_submitter_id)) continue bcc_treatment = { 'type': 'bcc_chemotherapy', 'project_id': project_id, 'treatment': {'submitter_id': treatment_submitter_id}, 'submitter_id': '{}-{}-{}'.format(treatment_submitter_id, line['days'], line.get('treatment_description', line.get('treatment_agent', 'na'))) } bcc_treatment.update(line) bcc_treatment = obscure_dates(bcc_treatment, output_dir=output_dir) bcc_treatment_emitter.write(bcc_treatment) bcc_treatment_emitter.close()
def transform(item_paths, output_dir, experiment_code, compresslevel=0): """Read bcc labkey json and writes gen3 json.""" cases = set([]) for line in reader('{}/case.json'.format(output_dir)): cases.add(line['submitter_id']) diagnoses_emitter = emitter('diagnosis', output_dir=output_dir) bcc_diagnosis_emitter = emitter('bcc_diagnosis', output_dir=output_dir) diagnosises = {} bcc_diagnosises = {} missing_cases = set([]) for p in item_paths: source = os.path.splitext(os.path.basename(p))[0] for line in reader(p): case_submitter_id = line['participantid'] bcc_submitter_id = '{}-{}'.format(case_submitter_id, source) diagnosis = default_diagnosis(case_submitter_id, project_id=DEFAULT_PROJECT_ID, line=line) submitter_id = diagnosis['submitter_id'] bcc_diagnosis = { 'type': 'bcc_diagnosis', 'diagnosis': { 'submitter_id': submitter_id }, 'source': source, 'submitter_id': bcc_submitter_id, 'project_id': DEFAULT_PROJECT_ID } if bcc_submitter_id in bcc_diagnosises: bcc_diagnosis = bcc_diagnosises[bcc_submitter_id] # we will use the name 'diagnosis' as a link back to gen3.diagnosis line['diagnosis_name'] = line.get('diagnosis', None) del line['diagnosis'] bcc_diagnosis.update(line) diagnosises[submitter_id] = diagnosis bcc_diagnosises[bcc_submitter_id] = bcc_diagnosis if case_submitter_id not in cases: print('no case for: >{}<'.format(case_submitter_id)) missing_cases.add(case_submitter_id) for k in diagnosises: diagnosises[k] = obscure_dates( diagnosises[k], output_dir=output_dir, participantid=diagnosises[k]['cases']['submitter_id']) diagnoses_emitter.write(diagnosises[k]) cases = missing_cases - cases print('missing diagnosis for {} cases'.format(len(cases))) for participantid in cases: diagnosis = default_diagnosis(participantid, project_id=DEFAULT_PROJECT_ID) diagnosis = obscure_dates(diagnosis, output_dir=output_dir) diagnoses_emitter.write(diagnosis) diagnoses_emitter.close() print('missing cases for {} cases'.format(len(missing_cases))) cases_emitter = emitter('case', output_dir=output_dir, append=True) for participantid in missing_cases: case = default_case(DEFAULT_EXPERIMENT_CODE, participantid, DEFAULT_PROJECT_ID) case = obscure_dates(case, output_dir=output_dir) cases_emitter.write(case)
diagnosis = default_diagnosis(participantid, project_id=DEFAULT_PROJECT_ID) diagnosis = obscure_dates(diagnosis, output_dir=output_dir) diagnoses_emitter.write(diagnosis) diagnoses_emitter.close() print('missing cases for {} cases'.format(len(missing_cases))) cases_emitter = emitter('case', output_dir=output_dir, append=True) for participantid in missing_cases: case = default_case(DEFAULT_EXPERIMENT_CODE, participantid, DEFAULT_PROJECT_ID) case = obscure_dates(case, output_dir=output_dir) cases_emitter.write(case) cases_emitter.close() bcc_diagnosises_emitter = emitter('bcc_diagnosis', output_dir=output_dir) for k in bcc_diagnosises: bcc_diagnosises[k] = obscure_dates(bcc_diagnosises[k], output_dir=output_dir) bcc_diagnosises_emitter.write(bcc_diagnosises[k]) bcc_diagnosises_emitter.close() if __name__ == "__main__": item_paths = ['source/bcc/voncologdiagnosis.json'] args = default_parser(DEFAULT_OUTPUT_DIR, DEFAULT_EXPERIMENT_CODE, DEFAULT_PROJECT_ID).parse_args() transform(item_paths, output_dir=args.output_dir, experiment_code=args.experiment_code)
def transform(item_paths, output_dir, compresslevel=0): """Transform the bmeg input to gen3 output directory.""" projects_emitter = emitter('project', output_dir=output_dir) experiments_emitter = emitter('experiment', output_dir=output_dir) cases_emitter = emitter('case', output_dir=output_dir) demographics_emitter = emitter('demographic', output_dir=output_dir) cases = {} projects = {} experiments = {} for p in [ 'source/ccle/InProject.Edge.json.gz', 'source/ccle/maf.InProject.Edge.json.gz' ]: for line in reader(p): # # ['type', 'project_id', '*submitter_id', '*cases.submitter_id', 'ethnicity', 'gender', 'race', 'year_of_birth', 'year_of_death'] project_submitter_id = line['to'] project_name = project_submitter_id.replace('Project:', '') project_name = 'ccle' project = { 'type': 'project', "code": project_name, "name": project_name, "state": "open", "availability_type": "Open", "dbgap_accession_number": project_name } projects[project_name] = project experiment_submitter_id = "experiment-{}".format( project_submitter_id.replace('Project:', '')) experiment = { "type": "experiment", "projects": [{ "code": project_name }], "submitter_id": experiment_submitter_id } experiment[ "experimental_description"] = project_submitter_id.replace( 'Project:', '') experiments[experiment_submitter_id] = experiment case = { 'type': 'case', '*experiments': { 'submitter_id': experiment_submitter_id } } case_submitter_id = line['from'] case['submitter_id'] = case_submitter_id cases[case_submitter_id] = case for project in projects: projects_emitter.write(projects[project]) for experiment in experiments: experiments_emitter.write(experiments[experiment]) projects_emitter.close() experiments_emitter.close() for p in item_paths: # ['MRN', 'OPTR', 'Date Of Initial Diagnosis', 'Sequence Number', 'Cancer Status', 'cEarliest Chemo Date', 'cEarliest Chemo Date Source', 'cErrorList', 'cEventCount', 'cNeoadjuvant Treatment', 'Count', 'cParent Specimen Count', 'Date of Most Definitive Surgical Resection', 'Tumor Size', 'Type Of First Recurrence', 'Case_ICD::Transformation', 'Case_Patient::Sex'] for line in reader(p): # {"_id": "Individual:CCLE:ACH-001665", "gid": "Individual:CCLE:ACH-001665", "label": "Individual", "data": {"individual_id": "CCLE:ACH-001665", "ccle_attributes": {"gender": "Male"}}} case_submitter_id = line['gid'] # # ['type', 'project_id', '*submitter_id', '*cases.submitter_id', 'ethnicity', 'gender', 'race', 'year_of_birth', 'year_of_death'] case = cases[case_submitter_id] cases_emitter.write(case) # # # type project_id *submitter_id *cases.submitter_id ethnicity gender race year_of_birth year_of_death demographic = { 'type': 'demographic', '*submitter_id': 'demographic-{}'.format(case_submitter_id), '*cases': { 'submitter_id': case_submitter_id } } data = line['data'] demographic['gender'] = data.get('gender', 'unknown').lower() if demographic['gender'] not in ['male', 'female']: demographic['gender'] = 'unknown' demographics_emitter.write(demographic) # # # ['type', 'project_id', 'submitter_id', 'cases.submitter_id', # # '*age_at_diagnosis', '*classification_of_tumor', '*days_to_last_follow_up', '*days_to_last_known_disease_status', '*days_to_recurrence', '*last_known_disease_status', '*morphology', '*primary_diagnosis', '*progression_or_recurrence', '*site_of_resection_or_biopsy', '*tissue_or_organ_of_origin', '*tumor_grade', '*tumor_stage', '*vital_status', # 'ajcc_clinical_m', 'ajcc_clinical_n', 'ajcc_clinical_stage', 'ajcc_clinical_t', # # 'ajcc_pathologic_m', 'ajcc_pathologic_n', 'ajcc_pathologic_stage', 'ajcc_pathologic_t', 'ann_arbor_b_symptoms', 'ann_arbor_clinical_stage', 'ann_arbor_extranodal_involvement', 'ann_arbor_pathologic_stage', 'burkitt_lymphoma_clinical_variant', 'cause_of_death', 'circumferential_resection_margin', 'colon_polyps_history', 'days_to_birth', 'days_to_death', 'days_to_hiv_diagnosis', 'days_to_new_event', 'figo_stage', 'hiv_positive', 'hpv_positive_type', 'hpv_status', 'laterality', # # 'ldh_level_at_diagnosis', 'ldh_normal_range_upper', 'lymph_nodes_positive', 'lymphatic_invasion_present', 'method_of_diagnosis', 'new_event_anatomic_site', 'new_event_type', 'perineural_invasion_present', 'prior_malignancy', 'prior_treatment', 'residual_disease', 'vascular_invasion_present', 'year_of_diagnosis'] # diagnosis = {'type': 'diagnosis', '*submitter_id': 'diagnosis-{}'.format(case_submitter_id), '*cases': {'submitter_id': case_submitter_id}} # diagnosis['*age_at_diagnosis'] = None # diagnosis['*classification_of_tumor'] = 'Unknown' # ['primary', 'metastasis', 'recurrence', 'other', 'Unknown', 'not reported', 'Not Allowed To Collect'] # diagnosis['*days_to_last_follow_up'] = None # diagnosis['*days_to_last_known_disease_status'] = None # diagnosis['*days_to_recurrence'] = None # # [ 'Distant met recurrence/progression', # # 'Loco-regional recurrence/progression', # # 'Biochemical evidence of disease without structural correlate', # # 'Tumor free', # # 'Unknown tumor status', # # 'With tumor', # # 'not reported', # # 'Not Allowed To Collect'] # disease_status = { # 'Evidence of this tumor': 'With tumor', # 'No evidence of this tumor': 'Tumor free', # 'Unknown, indeterminate whether this tumor is present; not stated': 'Unknown tumor status' # } # # diagnosis['*last_known_disease_status'] = disease_status.get(line['Cancer Status'], 'Unknown tumor status') # diagnosis['*morphology'] = 'tumor_size={}'.format(line['Tumor Size']) # "None is not of type 'string'") # diagnosis['*primary_diagnosis'] = line['Case_ICD::Transformation'] # diagnosis['*progression_or_recurrence'] = 'unknown' # ['yes', 'no', 'unknown', 'not reported', 'Not Allowed To Collect'] # diagnosis['*site_of_resection_or_biopsy'] = 'unknown' # diagnosis['*tissue_or_organ_of_origin'] = 'pancrease' # diagnosis['*tumor_grade'] = 'unknown' # "None is not of type 'string'") # diagnosis['*tumor_stage'] = 'unknown' # "None is not of type 'string'") # diagnosis['*vital_status'] = 'unknown' # # diagnosis_emitter.write(diagnosis) cases_emitter.close() demographics_emitter.close()
def transform(item_paths, output_dir, experiment_code, compresslevel=0): """Read bcc labkey json and writes gen3 json.""" cases = set([]) for line in reader('{}/case.json'.format(output_dir)): cases.add(line['submitter_id']) diagnoses = set([]) for line in reader('{}/diagnosis.json'.format(output_dir)): diagnoses.add(line['submitter_id']) missing_cases = set([]) print('cases len {}'.format(len(cases))) # dedup samples = [] samples_emitter = emitter('sample', output_dir=output_dir) bcc_samples_emitter = emitter('bcc_sample', output_dir=output_dir) missing_diagnoses = [] for p in item_paths: source = os.path.splitext(os.path.basename(p))[0] for line in reader(p): case_submitter_id = line.get('participantid', line.get('ParticipantID')) sample = default_sample(case_submitter_id, line=line, project_id=DEFAULT_PROJECT_ID) submitter_id = sample['submitter_id'] if case_submitter_id not in cases: # print('no case {} for sample {} - skipping.'.format(case_submitter_id, submitter_id)) missing_diagnoses.append( missing_parent(child_id=submitter_id, child_type='sample', parent_id=case_submitter_id, parent_type='case')) continue if submitter_id in samples: continue if sample['diagnoses']['submitter_id'] not in diagnoses: missing_diagnoses.append( missing_parent( child_id=submitter_id, child_type='sample', parent_id=sample['diagnoses']['submitter_id'], parent_type='diagnosis')) del sample['diagnoses']['submitter_id'] bcc_submitter_id = '{}-{}'.format(submitter_id, source) samples_emitter.write(sample) samples.append(submitter_id) bcc_sample = { 'type': 'bcc_sample', 'sample': { 'submitter_id': submitter_id }, 'source': source, 'submitter_id': bcc_submitter_id, 'project_id': DEFAULT_PROJECT_ID } bcc_sample.update(line) if '_labkeyurl_sample_type_id' in bcc_sample: bcc_sample['sample_type'] = LOOKUPS['sample_type'][ bcc_sample['sample_type_id']] del bcc_sample['sample_type_id'] del bcc_sample['_labkeyurl_sample_type_id'] bcc_sample = obscure_dates(bcc_sample, output_dir=output_dir) bcc_samples_emitter.write(bcc_sample) if case_submitter_id not in cases: missing_cases.add(case_submitter_id) cases.add(case_submitter_id) save_missing_parents(missing_diagnoses)