def create_descriptor(): """Returns a ISA-Tab descriptor using a simple sample plan for illustration.""" investigation = Investigation(identifier='I1') plan = SampleAssayPlan() plan.add_sample_type('liver') plan.add_sample_plan_record('liver', 5) plan.add_sample_type('blood') plan.add_sample_plan_record('blood', 3) plan.group_size = 2 f1 = StudyFactor(name='AGENT', factor_type=OntologyAnnotation(term='pertubation agent')) f2 = StudyFactor(name='INTENSITY', factor_type=OntologyAnnotation(term='intensity')) f3 = StudyFactor(name='DURATION', factor_type=OntologyAnnotation(term='time')) treatment_factory = TreatmentFactory(factors=[f1, f2, f3]) treatment_factory.add_factor_value(f1, {'cocaine', 'crack', 'aether'}) treatment_factory.add_factor_value(f2, {'low', 'medium', 'high'}) treatment_factory.add_factor_value(f3, {'short', 'long'}) ffactorial_design_treatments = treatment_factory\ .compute_full_factorial_design() treatment_sequence = TreatmentSequence( ranked_treatments=ffactorial_design_treatments) # treatment_factory.add_factor_value('intensity', 1.05) study = IsaModelObjectFactory(plan, treatment_sequence)\ .create_study_from_plan() study.filename = 's_study.txt' investigation.studies = [study] print(isatab.dumps(investigation))
def create_from_plan_parameters(galaxy_parameters_file, sample_assay_plans_file, study_info_file, treatment_plans_file, target_dir): decoder = SampleAssayPlanDecoder() if galaxy_parameters_file: galaxy_parameters = json.load(galaxy_parameters_file) sample_and_assay_plans, study_info, treatment_plan_params = \ map_galaxy_to_isa_create_json(galaxy_parameters) plan = decoder.load(io.StringIO(json.dumps(sample_and_assay_plans))) elif sample_assay_plans_file and study_info_file and treatment_plans_file: plan = decoder.load(sample_assay_plans_file) study_info = json.load(study_info_file) treatment_plan_params = json.load(treatment_plans_file) else: raise IOError('Wrong parameters provided') study_type = treatment_plan_params['study_type_cond']['study_type'] if study_type != 'intervention': raise NotImplementedError('Only supports Intervention studies') single_or_multiple = treatment_plan_params['study_type_cond'][ 'one_or_more']['single_or_multiple'] if single_or_multiple == 'multiple': raise NotImplementedError( 'Multiple treatments not yet implemented. Please select Single') intervention_type = treatment_plan_params['study_type_cond'][ 'one_or_more']['intervention_type']['select_intervention_type'] if intervention_type != 'chemical intervention': raise NotImplementedError( 'Only Chemical Interventions supported at this time') treatment_factory = TreatmentFactory( intervention_type=INTERVENTIONS['CHEMICAL'], factors=BASE_FACTORS) agent_levels = treatment_plan_params['study_type_cond']['one_or_more'][ 'intervention_type']['agent'].split(',') for agent_level in agent_levels: treatment_factory.add_factor_value(BASE_FACTORS[0], agent_level.strip()) dose_levels = treatment_plan_params['study_type_cond']['one_or_more'][ 'intervention_type']['intensity'].split(',') for dose_level in dose_levels: treatment_factory.add_factor_value(BASE_FACTORS[1], dose_level.strip()) duration_of_exposure_levels = treatment_plan_params['study_type_cond'][ 'one_or_more']['intervention_type']['duration'].split(',') for duration_of_exposure_level in duration_of_exposure_levels: treatment_factory.add_factor_value(BASE_FACTORS[2], duration_of_exposure_level.strip()) treatment_sequence = TreatmentSequence( ranked_treatments=treatment_factory.compute_full_factorial_design()) isa_object_factory = IsaModelObjectFactory(plan, treatment_sequence) s = isa_object_factory.create_assays_from_plan() contact = Person() contact.affiliation = study_info['study_pi_affiliation'] contact.last_name = study_info['study_pi_last_name'] contact.email = study_info['study_pi_email'] contact.first_name = study_info['study_pi_first_name'] s.contacts = [contact] s.description = study_info['study_description'] s.filename = 's_study.txt' s.title = 'ISA created {}'.format(datetime.datetime.now().isoformat()) s.identifier = 'ISA-{}'.format(uuid.uuid4().hex[:8]) i = Investigation() i.contacts = [contact] i.description = s.description i.title = s.title i.identifier = s.identifier i.studies = [s] isatab.dump(isa_obj=i, output_path=target_dir, i_file_name='i_investigation.txt') for assay in s.assays: for data_file in assay.data_files: data_file_path = os.path.join(target_dir, data_file.filename) with open(data_file_path, 'a'): os.utime(data_file_path, None)
def create_from_galaxy_parameters(galaxy_parameters_file, target_dir): def _create_treatment_sequence(galaxy_parameters): treatment_plan = galaxy_parameters['treatment_plan'] study_type = treatment_plan['study_type']['study_type_selector'] log.debug(json.dumps(galaxy_parameters, indent=4)) try: single_or_multiple = treatment_plan['study_type']['balance'][ 'multiple_interventions'] except KeyError: single_or_multiple = \ treatment_plan['study_type']['multiple_interventions'][ 'multiple_interventions_selector'] if single_or_multiple == 'multiple': raise NotImplementedError( 'Multiple treatments not yet implemented. Please select Single') if study_type == 'full_factorial': intervention_type = \ treatment_plan['study_type']['multiple_interventions'][ 'intervention_type']['intervention_type_selector'] if intervention_type == 'chemical intervention': interventions = INTERVENTIONS['CHEMICAL'] elif intervention_type == 'dietary intervention': interventions = INTERVENTIONS['DIET'] elif intervention_type == 'behavioural intervention': interventions = INTERVENTIONS['BEHAVIOURAL'] elif intervention_type == 'biological intervention': interventions = INTERVENTIONS['BIOLOGICAL'] elif intervention_type == 'surgical intervention': interventions = INTERVENTIONS['SURGICAL'] elif intervention_type == 'radiological intervention': # not in tool yet interventions = INTERVENTIONS['RADIOLOGICAL'] else: # default to chemical interventions = INTERVENTIONS['CHEMICAL'] treatment_factory = TreatmentFactory( intervention_type=interventions, factors=BASE_FACTORS) # Treatment Sequence agent_levels = \ treatment_plan['study_type']['multiple_interventions'][ 'intervention_type']['agent'].split(',') for agent_level in agent_levels: treatment_factory.add_factor_value(BASE_FACTORS[0], agent_level.strip()) dose_levels = \ treatment_plan['study_type']['multiple_interventions'][ 'intervention_type']['intensity'].split(',') for dose_level in dose_levels: treatment_factory.add_factor_value(BASE_FACTORS[1], dose_level.strip()) duration_of_exposure_levels = treatment_plan[ 'study_type']['multiple_interventions']['intervention_type'][ 'duration'].split(',') for duration_of_exposure_level in duration_of_exposure_levels: treatment_factory.add_factor_value( BASE_FACTORS[2], duration_of_exposure_level.strip()) treatment_sequence = TreatmentSequence( ranked_treatments=treatment_factory .compute_full_factorial_design()) group_size = int( galaxy_parameters['treatment_plan']['study_type'][ 'multiple_interventions']['group_size']) for ranked_treatment in \ treatment_sequence.ranked_treatments: ranked_treatment[0].group_size = group_size return treatment_sequence elif study_type == 'fractional_factorial': intervention_type = \ treatment_plan['study_type']['balance'][ 'multiple_interventions']['intervention_type_selector'] treatments = set() study_factors = [StudyFactor(name=x.strip()) for x in treatment_plan['study_type'][ 'balance']['multiple_interventions'][ 'study_factors'].split(',')] for group in \ treatment_plan['study_type']['balance'][ 'multiple_interventions']['study_groups']: factor_values = () for x, y in zip(study_factors, [x.strip() for x in group['factor_values'].split( ',')]): factor_value = FactorValue(factor_name=x, value=y) factor_values = factor_values + (factor_value,) if galaxy_parameters['treatment_plan']['study_type'][ 'balance']['balanced_groups']: group_size = int( galaxy_parameters['treatment_plan']['study_type'][ 'balance']['multiple_interventions']['group_size']) else: group_size = int(group['group_size']) treatment = Treatment(treatment_type=intervention_type, factor_values=factor_values, group_size=group_size) treatments.add(treatment) treatment_sequence = TreatmentSequence(ranked_treatments=treatments) return treatment_sequence def _create_sample_plan(sample_assay_plan, sample_plan_record): def _create_nmr_assay_type(assay_plan_record): nmr_assay_type = AssayType( measurement_type='metabolite profiling', technology_type='nmr spectroscopy') nmr_top_mods = NMRTopologyModifiers() nmr_top_mods.technical_replicates = assay_plan_record[ 'assay_type']['acquisition_mode']['technical_replicates'] nmr_top_mods.acquisition_modes.add( assay_plan_record['assay_type'][ 'acquisition_mode']['acquisition_mode_selector']) nmr_top_mods.instruments.add('{} {}'.format( assay_plan_record['assay_type'][ 'acquisition_mode']['nmr_instrument'], assay_plan_record['assay_type']['acquisition_mode']['magnet'])) nmr_top_mods.pulse_sequences.add( assay_plan_record['assay_type'][ 'acquisition_mode']['pulse_sequence'] ) nmr_top_mods.magnet_power = \ assay_plan_record['assay_type']['acquisition_mode']['magnet'] nmr_assay_type.topology_modifiers = nmr_top_mods return nmr_assay_type def _create_ms_assay_type(assay_plan_record): ms_assay_type = AssayType( measurement_type='metabolite profiling', technology_type='mass spectrometry') ms_assay_type.topology_modifiers = MSTopologyModifiers( sample_fractions=set(map( lambda x: x['sample_fraction'], assay_plan_record['assay_type']['sample_fractions']))) injection_modes = ms_assay_type.topology_modifiers.injection_modes if len(assay_plan_record['assay_type']['injections']) > 0: for inj_mod in assay_plan_record['assay_type']['injections']: injection_mode = MSInjectionMode( injection_mode=inj_mod[ 'injection_mode']['injection_mode_selector'], ms_instrument=inj_mod['injection_mode']['instrument'] ) if inj_mod['injection_mode'][ 'injection_mode_selector'] in ('LC', 'GC'): injection_mode.chromatography_instrument = inj_mod[ 'injection_mode']['chromatography_instrument'] if inj_mod[ 'injection_mode']['injection_mode_selector'] == 'LC': injection_mode.chromatography_column = inj_mod[ 'injection_mode']['chromatography_column'] injection_modes.add(injection_mode) for acq_mod in inj_mod['injection_mode']['acquisitions']: injection_mode.acquisition_modes.add( MSAcquisitionMode( acquisition_method=acq_mod['acquisition_mode'], technical_repeats=acq_mod[ 'technical_replicates'] ) ) if inj_mod['injection_mode'][ 'injection_mode_selector'] == 'GC': for deriva in inj_mod['injection_mode'][ 'derivatizations']: derivatization = deriva['derivatization'] if re.match('(.*?) \((.*?)\)', derivatization): matches = next(iter( re.findall('(.*?) \((.*?)\)', derivatization))) term, ontoid = matches[0], matches[1] source_name, accession_id = \ ontoid.split(':')[0], \ ontoid.split(':')[1] source = OntologySource(name=source_name) derivatization = OntologyAnnotation( term=term, term_source=source, term_accession=accession_id) injection_mode.derivatizations.add( derivatization) return ms_assay_type if sample_plan_record['material_type'] == 'user defined': sample_type = sample_plan_record['material_type']['sample_type_ud'] else: sample_type = sample_plan_record['material_type'] if re.match('(.*?) \((.*?)\)', sample_type): matches = next(iter(re.findall('(.*?) \((.*?)\)', sample_type))) term, ontoid = matches[0], matches[1] source_name, accession_id = ontoid.split(':')[0], \ ontoid.split(':')[1] source = OntologySource(name=source_name) sample_type = OntologyAnnotation(term=term, term_source=source, term_accession=accession_id) sample_assay_plan.add_sample_type(sample_type) sample_size = sample_plan_record['sample_collections'] sample_assay_plan.add_sample_plan_record(sample_type, sample_size) for assay_plan_record in sample_plan_record['assay_plans']: tt = assay_plan_record['assay_type']['assay_type_selector'] if tt == 'nmr': assay_type = _create_nmr_assay_type(assay_plan_record) elif tt == 'ms': assay_type = _create_ms_assay_type(assay_plan_record) else: raise NotImplementedError('Only MS and NMR assays supported') sample_assay_plan.add_assay_type(assay_type) sample_assay_plan.add_assay_plan_record(sample_type, assay_type) return sample_assay_plan def _inject_qcqa_plan(sample_assay_plan, qcqa_record): qc_type = qcqa_record['qc_type']['qc_type_selector'] if qc_type == 'interval_series': material_type = qcqa_record['material_type'] if re.match('(.*?) \((.*?)\)', material_type): matches = next(iter( re.findall('(.*?) \((.*?)\)', material_type))) term, ontoid = matches[0], matches[1] source_name, accession_id = ontoid.split(':')[0], \ ontoid.split(':')[1] source = OntologySource(name=source_name) material_type = OntologyAnnotation( term=term, term_source=source, term_accession=accession_id) sample_assay_plan.add_sample_qc_plan_record( material_type=material_type, injection_interval=qcqa_record[ 'qc_type']['injection_frequency']) elif 'dilution_series' in qc_type: values = [int(x) for x in qcqa_record[ 'qc_type']['values'].split(',')] material_type = qcqa_record['material_type'] if re.match('(.*?) \((.*?)\)', material_type): matches = next(iter( re.findall('(.*?) \((.*?)\)', material_type))) term, ontoid = matches[0], matches[1] source_name, accession_id = ontoid.split(':')[0], \ ontoid.split(':')[1] source = OntologySource(name=source_name) material_type = OntologyAnnotation( term=term, term_source=source, term_accession=accession_id) batch = SampleQCBatch(material=material_type) for value in values: batch.characteristic_values.append( Characteristic(category=OntologyAnnotation( term='quantity'), value=value) ) if 'pre' in qc_type: sample_assay_plan.pre_run_batch = batch elif 'post' in qc_type: sample_assay_plan.post_run_batch = batch else: raise NotImplementedError('QC type not recognized!') return sample_assay_plan # pre-generation checks if galaxy_parameters_file: galaxy_parameters = json.load(galaxy_parameters_file) log.debug(json.dumps(galaxy_parameters, indent=4)) else: raise IOError('Could not load Galaxy parameters file!') if target_dir: if not os.path.exists(target_dir): raise IOError('Target path does not exist!') if len(galaxy_parameters['sample_and_assay_planning']['sample_plans']) == 0: raise IOError('No Sampling plan specified') treatment_sequence = _create_treatment_sequence(galaxy_parameters) sample_assay_plan = SampleAssayPlan() for sample_plan_record in galaxy_parameters['sample_and_assay_planning'][ 'sample_plans']: _ = _create_sample_plan(sample_assay_plan, sample_plan_record) for qcqa_record in galaxy_parameters['qc_planning']['qc_plans']: _ = _inject_qcqa_plan(sample_assay_plan, qcqa_record) try: sample_assay_plan.group_size = \ int(galaxy_parameters['treatment_plan']['study_type'][ 'multiple_interventions']['group_size']) except KeyError: try: sample_assay_plan.group_size = \ int(galaxy_parameters['treatment_plan']['study_type'][ 'balance']['multiple_interventions']['group_size']) except KeyError: log.debug( 'Group size not set for root plan as multiple intervention') sample_assay_plan.group_size = 0 # raises AttributeError study_info = galaxy_parameters['study_metadata'] if len(sample_assay_plan.sample_plan) == 0: log.info('No sample plan defined') if len(sample_assay_plan.assay_plan) == 0: log.info('No assay plan defined') study_design = StudyDesign() study_design.add_single_sequence_plan(treatment_sequence, sample_assay_plan) isa_object_factory = IsaModelObjectFactory(study_design) if len(sample_assay_plan.sample_plan) == 0: s = Study() else: s = isa_object_factory.create_assays_from_plan() c = Person() c.affiliation = study_info.get('affiliation') c.last_name = study_info.get('last_name') c.email = study_info['email'] c.first_name = study_info['first_name'] s.contacts = [c] s.description = study_info['description'] s.filename = 's_study.txt' s.title = study_info['title'] s.identifier = 'ISA-{}'.format(uuid.uuid4().hex[:8]) s.comments = [ Comment(name='Consent Information (ICO:0000011)', value=study_info['study_consent']), Comment(name='Data Use Requirement (DUO:0000017)', value=study_info['study_use_condition']) ] i = Investigation() i.contacts = [c] i.description = "" i.title = "Investigation" i.identifier = s.identifier i.studies = [s] try: i.ontology_source_references = s.ontology_source_references except AttributeError: pass i.ontology_source_references.append(OntologySource(name='ICO')) i.ontology_source_references.append(OntologySource(name='DUO')) def sanitize_filename(filename): filename = str(filename).strip().replace(' ', '_') filename = re.sub(r'(?u)[^-\w.]', '_', filename) return filename i.filename = sanitize_filename(i.filename) for s in i.studies: s.filename = sanitize_filename(s.filename) for a in s.assays: a.filename = sanitize_filename(a.filename) isatab.dump(isa_obj=i, output_path=target_dir)
def convert(json_path, output_path): print(json_path) print(output_path) with open(json_path, 'r') as f: dcc_json = json.load(f) # print(array['protocol']) # for element in array['protocol']: # array['protocol'][element]['id'] # array['protocol'][element]['description'] # array['protocol'][element]['type'] # array['protocol'][element]['filename'] # for element in array['measurement']: # print(array['measurement'][element]['corrected_mz']) # for element in array['subject']: # print(array['subject'][element]['species']) # Building the Investigation Object and its elements: project_set_json = dcc_json.get('project') if len(project_set_json) == 0: raise IOError('No project found in input JSON') # print(next(iter(project_set_json))) project_json = next(iter(project_set_json.values())) investigation = Investigation(identifier=project_json['id']) obi = OntologySource(name='OBI', description='Ontology for Biomedical Investigations') investigation.ontology_source_references.append(obi) inv_person = Person( first_name=project_json['PI_first_name'], last_name=project_json['PI_last_name'], email=project_json['PI_email'], address=project_json['address'], affiliation=(', '.join( [project_json['department'], project_json['institution']])), roles=[ OntologyAnnotation(term="", term_source=obi, term_accession="http://purl.org/obo/OBI_1") ]) investigation.contacts.append(inv_person) study_set_json = dcc_json.get('study') if len(study_set_json) > 0: study_json = next(iter(study_set_json.values())) study = Study( identifier=study_json['id'], title=study_json['title'], description=study_json['description'], design_descriptors=[ OntologyAnnotation(term=study_json['type'], term_source=obi, term_accession="http://purl.org/obo/OBI_1") ], filename='s_{study_id}.txt'.format(study_id=study_json['id'])) investigation.studies = [study] studyid = study_json['id'] print(studyid) study_person = Person( first_name=study_json['PI_first_name'], last_name=study_json['PI_last_name'], email=study_json['PI_email'], address=study_json['address'], affiliation=(', '.join( [study_json['department'], study_json['institution']])), roles=[ OntologyAnnotation(term='principal investigator', term_source=obi, term_accession="http://purl.org/obo/OBI_1") ]) study.contacts.append(study_person) for factor_json in dcc_json['factor'].values(): factor = StudyFactor(name=factor_json['id']) study.factors.append(factor) for i, protocol_json in enumerate(dcc_json['protocol'].values()): oat_p = protocol_json['type'] oa_protocol_type = OntologyAnnotation( term=oat_p, term_source=obi, term_accession="http://purl.org/obo/OBI_1") study.protocols.append( Protocol(name=protocol_json['id'], protocol_type=oa_protocol_type, description=protocol_json['description'], uri=protocol_json['filename'])) if 'MS' in protocol_json['type']: study.assays.append( Assay(measurement_type=OntologyAnnotation( term='mass isotopologue distribution analysis', term_source=obi, term_accession="http://purl.org/obo/OBI_112"), technology_type=OntologyAnnotation( term='mass spectrometry', term_source=obi, term_accession="http://purl.org/obo/OBI_1"), filename='a_assay_ms_{count}.txt'.format(count=i))) if 'NMR' in protocol_json['type']: study.assays.append( Assay(measurement_type=OntologyAnnotation( term='isotopomer analysis', term_source=obi, term_accession="http://purl.org/obo/OBI_111"), technology_type=OntologyAnnotation( term='nmr spectroscopy', term_source=obi, term_accession="http://purl.org/obo/OBI_1"), filename='a_assay_nmr.txt')) for subject_json in dcc_json['subject'].values(): # print(array['subject'][element]) if "organism" in subject_json['type']: source = Source(name=subject_json['id']) ncbitaxon = OntologySource(name='NCBITaxon', description="NCBI Taxonomy") characteristic_organism = Characteristic( category=OntologyAnnotation(term="Organism"), value=OntologyAnnotation( term=subject_json['species'], term_source=ncbitaxon, term_accession= 'http://purl.bioontology.org/ontology/NCBITAXON/9606')) source.characteristics.append(characteristic_organism) study.sources.append(source) elif 'tissue_slice' in subject_json['type']: # print(array['subject'][element]['type']) source = Source(name=subject_json['id']) study.sources.append(source) ncbitaxon = OntologySource(name='NCBITaxon', description="NCBI Taxonomy") characteristic_organism = Characteristic( category=OntologyAnnotation(term="Organism"), value=OntologyAnnotation( term=subject_json['species'], term_source=ncbitaxon, term_accession= 'http://purl.bioontology.org/ontology/NCBITAXON/9606')) source.characteristics.append(characteristic_organism) sample = Sample(name=subject_json['id'], derives_from=subject_json['parentID']) characteristic_organismpart = Characteristic( category=OntologyAnnotation(term='organism_part'), value=OntologyAnnotation( term=subject_json['tissue_type'], term_source=obi, term_accession="http://purl.org/obo/OBI_1")) sample.characteristics.append(characteristic_organismpart) study.samples.append(sample) # print(study.samples[0].name) sample_collection_process = Process( executes_protocol=study.get_prot( subject_json['protocol.id'])) sample_collection_process.inputs.append(source) sample_collection_process.outputs.append(sample) study.process_sequence.append(sample_collection_process) else: source = Source(name=subject_json['id']) ncbitaxon = OntologySource(name='NCBITaxon', description="NCBI Taxonomy") characteristic_organism = Characteristic( category=OntologyAnnotation(term="Organism"), value=OntologyAnnotation( term=subject_json['species'], term_source=ncbitaxon, term_accession= 'http://purl.bioontology.org/ontology/NCBITAXON/9606')) source.characteristics.append(characteristic_organism) study.sources.append(source) print(subject_json['id']) print(subject_json['species']) print(subject_json['type']) # for src in investigation.studies[0].materials: # # for sam in investigation.studies[0].materials: for sample_json in dcc_json['sample'].values(): if 'cells' in sample_json['type']: material_separation_process = Process( executes_protocol=study.get_prot( sample_json['protocol.id'])) material_separation_process.name = sample_json['id'] # dealing with input material, check that the parent material is already among known samples or sources if len([ x for x in study.samples if x.name == sample_json['parentID'] ]) == 0: material_in = Sample(name=sample_json['parentID']) material_separation_process.inputs.append(material_in) study.assays[0].samples.append(material_in) else: print([ x for x in study.samples if x.name == sample_json['parentID'] ]) material_separation_process.inputs.append([ x for x in study.samples if x.name == sample_json['parentID'] ][0]) material_out = Sample(name=sample_json['id']) material_type = Characteristic( category=OntologyAnnotation(term='material_type'), value=OntologyAnnotation( term=sample_json['type'], term_source=obi, term_accession="http://purl.org/obo/OBI_xxxxxxx")) material_out.characteristics.append(material_type) material_separation_process.outputs.append(material_out) study.assays[0].samples.append(material_out) try: sample_collection_process except NameError: sample_collection_process = None if sample_collection_process is None: sample_collection_process = Process(executes_protocol="") else: # plink(protein_extraction_process, data_acq_process) # plink(material_separation_process, protein_extraction_process) plink(sample_collection_process, protein_extraction_process) if 'protein_extract' in sample_json['type']: protein_extraction_process = Process( executes_protocol=study.get_prot( sample_json['protocol.id'])) protein_extraction_process.name = sample_json['id'] if len([ x for x in study.samples if x.name == sample_json['parentID'] ]) == 0: material_in = Sample(name=sample_json['parentID']) protein_extraction_process.inputs.append(material_in) study.assays[0].samples.append(material_in) else: # print([x for x in study.samples if x.name == sample_json['parentID']]) protein_extraction_process.inputs.append(material_in) # for material_in in study.samples: # # print("OHO:", material_in.name) # if material_in.name == sample_json['parentID']: # # print("C:",sample_json['parentID']) # #no need to create, just link to process # protein_extraction_process.inputs.append(x) # else: # # print("D:", sample_json['parentID']) # #create new material and link # material_in = Sample(name=sample_json['parentID']) # protein_extraction_process.inputs.append(material_in) material_out = Material(name=sample_json['id']) material_out.type = "Extract Name" material_type = Characteristic( category=OntologyAnnotation(term='material_type'), value=OntologyAnnotation( term=sample_json['type'], term_source=obi, term_accession="http://purl.org/obo/OBI_1")) material_out.characteristics.append(material_type) study.assays[0].samples.append(material_in) study.assays[0].materials['other_material'].append(material_in) try: material_separation_process except NameError: material_separation_process = None if material_separation_process is None: material_separation_process = Process(executes_protocol="") else: # plink(protein_extraction_process, data_acq_process) plink(material_separation_process, protein_extraction_process) if 'polar' in sample_json['type']: material_in = Material(name=sample_json['parentID']) material_type = Characteristic( category=OntologyAnnotation(term='material_type', term_source=obi), value=OntologyAnnotation(term=sample_json['type'], term_source=obi)) material_in.characteristics.append(material_type) study.assays[0].materials['other_material'].append(material_in) data_acq_process = Process(executes_protocol=study.get_prot( sample_json['protocol.id'])) data_acq_process.name = sample_json['id'] datafile = DataFile( filename='{filename}.txt'.format(filename='_'.join( ['mass_isotopomer-data', studyid, sample_json['id']])), label='Raw Data File') data_acq_process.outputs.append(datafile) # print(study.assays[0].technology_type.term) study.assays[0].data_files.append(datafile) try: protein_extraction_process except NameError: protein_extraction_process = None if protein_extraction_process is None: protein_extraction_process = Process(executes_protocol="") else: plink(protein_extraction_process, data_acq_process) # else: # material_in = Material(name=sample_json['parentID']) # material_out = Material(name=sample_json['id']) # material_type = Characteristic( # category=OntologyAnnotation(term="material_type"), # value=OntologyAnnotation(term=sample_json['type'], # term_source=obi, # term_accession="http://purl.org/obo/OBI_1")) # material_out.characteristics.append(material_type) # process = Process(executes_protocol=sample_json['protocol.id']) # process.name = sample_json['id'] # process.inputs.append(material_in) # process.outputs.append(material_out) # # study.assays[0].materials['other_material'].append(material_in) # study.assays[0].materials['other_material'].append(material_out) if 'bulk_tissue' in sample_json['type']: bulk_process = Process(executes_protocol=study.get_prot( sample_json['protocol.id'])) bulk_process.name = sample_json['id'] if len([ x for x in study.samples if x.name == sample_json['parentID'] ]) == 0: material_in = Sample(name=sample_json['parentID']) bulk_process.inputs.append(material_in) study.assays[0].samples.append(material_in) else: # print([x for x in study.samples if x.name == sample_json['parentID']]) bulk_process.inputs.append(material_in) plink(sample_collection_process, bulk_process) data_rec_header = '\t'.join( ('metabolite name', 'assignment', 'signal intensity', 'retention time', 'm/z', 'formula', 'adduct', 'isotopologue', 'sample identifier')) records = [] for element in dcc_json['measurement']: # metabolite_name: -> compound # array['measurement'][element]['signal_intensity'] record = '\t'.join((dcc_json['measurement'][element]['compound'], dcc_json['measurement'][element]['assignment'], dcc_json['measurement'][element]['raw_intensity'], dcc_json['measurement'][element]['retention_time'], dcc_json['measurement'][element]['corrected_mz'], dcc_json['measurement'][element]['formula'], dcc_json['measurement'][element]['adduct'], dcc_json['measurement'][element]['isotopologue'], dcc_json['measurement'][element]['sample.id'])) # print(record) records.append(record) if not os.path.exists(output_path): os.makedirs(output_path) try: with open( '{output_path}/{study_id}-maf-data-nih-dcc-json.txt'. format(output_path=output_path, study_id=studyid), 'w') as fh: print( "'writing 'maf file document' to file from 'generate_maf_file' method:..." ) fh.writelines(data_rec_header) fh.writelines('\n') for item in records: fh.writelines(item) fh.writelines('\n') print("writing 'investigation information' to file...") print(isatab.dumps(investigation)) isatab.dump(investigation, output_path=output_path) except IOError: print("Error: in main() method can't open file or write data")