def generate_donor_organism_json(data, output_dir, donor_number, bundle_uuid): file_name = f'donor_organism_{donor_number}.json' donor_organism_json = create_donor_organism_json(data=data, file_uuid=generate_file_uuid(bundle_uuid, file_name), i=donor_number) with open(f'{output_dir}/{file_name}', 'w') as f: f.write(json.dumps(donor_organism_json, indent=4)) print(f'"{output_dir}/{file_name}" successfully written.')
def generate_cell_suspension_json(wb, output_dir, cell_count, bundle_uuid): file_name = 'cell_suspension_0.json' cell_suspension_data = parse_cell_suspension_data_from_xlsx(wb) cell_json = create_cell_suspension_jsons(data=cell_suspension_data, cell_count=cell_count, file_uuid=generate_file_uuid(bundle_uuid, file_name)) with open(f'{output_dir}/{file_name}', 'w') as f: f.write(json.dumps(cell_json, indent=4)) print(f'"{output_dir}/{file_name}" successfully written.')
def generate_library_preparation_protocol_json(wb, output_dir, bundle_uuid): file_name = 'library_preparation_protocol_0.json' library_preparation_protocol_data = parse_library_preparation_protocol_data_from_xlsx(wb) library_preparation_protocol_json = create_library_preparation_protocol_json( data=library_preparation_protocol_data, file_uuid=generate_file_uuid(bundle_uuid, file_name) ) with open(f'{output_dir}/{file_name}', 'w') as f: f.write(json.dumps(library_preparation_protocol_json, indent=4)) print(f'"{output_dir}/{file_name}" successfully written.')
def generate_sequencing_protocol_json(wb, output_dir, bundle_uuid): sequencing_protocol_data = parse_sequencing_protocol_data_from_xlsx(wb) lengths = set(map(len, sequencing_protocol_data.values())) for i in range(one(lengths)): file_name = f'sequencing_protocol_{i}.json' sequencing_protocol_json = create_sequencing_protocol_json( data=sequencing_protocol_data, file_uuid=generate_file_uuid(bundle_uuid, file_name), i=i ) with open(f'{output_dir}/{file_name}', 'w') as f: json.dump(sequencing_protocol_json, f, indent=4) print(f'"{output_dir}/{file_name}" successfully written.')
def file_uuid_callback(file_path: str): file_path = Path(file_path) file_name = file_path.name file_uuid = generate_file_uuid(bundle_uuid, file_name) log.info('Allocated UUID %s for file %s', file_uuid, file_path) if file_name.endswith('.json'): with file_path.open('rt') as f: document = json.load(f) if file_name == 'links.json': pass elif file_name == 'project_0.json': assert document['provenance'][ 'document_id'] == bundle_uuid else: assert document['provenance'][ 'document_id'] == file_uuid return file_uuid
def generate_analysis_json(bundle_uuid, output_dir): file_name = 'analysis_file_0.json' version = timestamp() analysis_json = { "describedBy": "https://schema.humancellatlas.org/type/file/6.0.0/analysis_file", "file_core": { "file_name": "matrix.mtx.zip", "format": "mtx" }, "schema_type": "file", "provenance": { "document_id": generate_file_uuid(bundle_uuid, file_name), "submission_date": version, # TODO: Fetch from DSS if it exists "update_date": version } } with open(f'{output_dir}/{file_name}', 'w') as f: f.write(json.dumps(analysis_json, indent=4)) print(f'"{output_dir}/{file_name}" successfully written.')
def generate_analysis_protocol_json(output_dir, bundle_uuid): # TODO: Hard-coded and not sure where this data should come from... ??? file_name = 'analysis_protocol_0.json' version = timestamp() analysis_protocol_json = { "computational_method": "SmartSeq2SingleCell", "describedBy": "https://schema.humancellatlas.org/type/protocol/analysis/9.0.0/analysis_protocol", "protocol_core": { "protocol_id": "smartseq2_v2.3.0" }, "schema_type": "protocol", "type": { "text": "analysis" }, "provenance": { "document_id": generate_file_uuid(bundle_uuid, file_name), "submission_date": version, # TODO: Fetch from DSS if it exists "update_date": version } } with open(f'{output_dir}/{file_name}', 'w') as f: f.write(json.dumps(analysis_protocol_json, indent=4)) print(f'"{output_dir}/{file_name}" successfully written.')