def create_from_plan_parameters(galaxy_parameters_file, sample_assay_plans_file, study_info_file, treatment_plans_file, target_dir): decoder = SampleAssayPlanDecoder() if galaxy_parameters_file: galaxy_parameters = json.load(galaxy_parameters_file) sample_and_assay_plans, study_info, treatment_plan_params = \ map_galaxy_to_isa_create_json(galaxy_parameters) plan = decoder.load(io.StringIO(json.dumps(sample_and_assay_plans))) elif sample_assay_plans_file and study_info_file and treatment_plans_file: plan = decoder.load(sample_assay_plans_file) study_info = json.load(study_info_file) treatment_plan_params = json.load(treatment_plans_file) else: raise IOError('Wrong parameters provided') study_type = treatment_plan_params['study_type_cond']['study_type'] if study_type != 'intervention': raise NotImplementedError('Only supports Intervention studies') single_or_multiple = treatment_plan_params['study_type_cond'][ 'one_or_more']['single_or_multiple'] if single_or_multiple == 'multiple': raise NotImplementedError( 'Multiple treatments not yet implemented. Please select Single') intervention_type = treatment_plan_params['study_type_cond'][ 'one_or_more']['intervention_type']['select_intervention_type'] if intervention_type != 'chemical intervention': raise NotImplementedError( 'Only Chemical Interventions supported at this time') treatment_factory = TreatmentFactory( intervention_type=INTERVENTIONS['CHEMICAL'], factors=BASE_FACTORS) agent_levels = treatment_plan_params['study_type_cond']['one_or_more'][ 'intervention_type']['agent'].split(',') for agent_level in agent_levels: treatment_factory.add_factor_value(BASE_FACTORS[0], agent_level.strip()) dose_levels = treatment_plan_params['study_type_cond']['one_or_more'][ 'intervention_type']['intensity'].split(',') for dose_level in dose_levels: treatment_factory.add_factor_value(BASE_FACTORS[1], dose_level.strip()) duration_of_exposure_levels = treatment_plan_params['study_type_cond'][ 'one_or_more']['intervention_type']['duration'].split(',') for duration_of_exposure_level in duration_of_exposure_levels: treatment_factory.add_factor_value(BASE_FACTORS[2], duration_of_exposure_level.strip()) treatment_sequence = TreatmentSequence( ranked_treatments=treatment_factory.compute_full_factorial_design()) isa_object_factory = IsaModelObjectFactory(plan, treatment_sequence) s = isa_object_factory.create_assays_from_plan() contact = Person() contact.affiliation = study_info['study_pi_affiliation'] contact.last_name = study_info['study_pi_last_name'] contact.email = study_info['study_pi_email'] contact.first_name = study_info['study_pi_first_name'] s.contacts = [contact] s.description = study_info['study_description'] s.filename = 's_study.txt' s.title = 'ISA created {}'.format(datetime.datetime.now().isoformat()) s.identifier = 'ISA-{}'.format(uuid.uuid4().hex[:8]) i = Investigation() i.contacts = [contact] i.description = s.description i.title = s.title i.identifier = s.identifier i.studies = [s] isatab.dump(isa_obj=i, output_path=target_dir, i_file_name='i_investigation.txt') for assay in s.assays: for data_file in assay.data_files: data_file_path = os.path.join(target_dir, data_file.filename) with open(data_file_path, 'a'): os.utime(data_file_path, None)
def create_from_galaxy_parameters(galaxy_parameters_file, target_dir): def _create_treatment_sequence(galaxy_parameters): treatment_plan = galaxy_parameters['treatment_plan'] study_type = treatment_plan['study_type']['study_type_selector'] log.debug(json.dumps(galaxy_parameters, indent=4)) try: single_or_multiple = treatment_plan['study_type']['balance'][ 'multiple_interventions'] except KeyError: single_or_multiple = \ treatment_plan['study_type']['multiple_interventions'][ 'multiple_interventions_selector'] if single_or_multiple == 'multiple': raise NotImplementedError( 'Multiple treatments not yet implemented. Please select Single') if study_type == 'full_factorial': intervention_type = \ treatment_plan['study_type']['multiple_interventions'][ 'intervention_type']['intervention_type_selector'] if intervention_type == 'chemical intervention': interventions = INTERVENTIONS['CHEMICAL'] elif intervention_type == 'dietary intervention': interventions = INTERVENTIONS['DIET'] elif intervention_type == 'behavioural intervention': interventions = INTERVENTIONS['BEHAVIOURAL'] elif intervention_type == 'biological intervention': interventions = INTERVENTIONS['BIOLOGICAL'] elif intervention_type == 'surgical intervention': interventions = INTERVENTIONS['SURGICAL'] elif intervention_type == 'radiological intervention': # not in tool yet interventions = INTERVENTIONS['RADIOLOGICAL'] else: # default to chemical interventions = INTERVENTIONS['CHEMICAL'] treatment_factory = TreatmentFactory( intervention_type=interventions, factors=BASE_FACTORS) # Treatment Sequence agent_levels = \ treatment_plan['study_type']['multiple_interventions'][ 'intervention_type']['agent'].split(',') for agent_level in agent_levels: treatment_factory.add_factor_value(BASE_FACTORS[0], agent_level.strip()) dose_levels = \ treatment_plan['study_type']['multiple_interventions'][ 'intervention_type']['intensity'].split(',') for dose_level in dose_levels: treatment_factory.add_factor_value(BASE_FACTORS[1], dose_level.strip()) duration_of_exposure_levels = treatment_plan[ 'study_type']['multiple_interventions']['intervention_type'][ 'duration'].split(',') for duration_of_exposure_level in duration_of_exposure_levels: treatment_factory.add_factor_value( BASE_FACTORS[2], duration_of_exposure_level.strip()) treatment_sequence = TreatmentSequence( ranked_treatments=treatment_factory .compute_full_factorial_design()) group_size = int( galaxy_parameters['treatment_plan']['study_type'][ 'multiple_interventions']['group_size']) for ranked_treatment in \ treatment_sequence.ranked_treatments: ranked_treatment[0].group_size = group_size return treatment_sequence elif study_type == 'fractional_factorial': intervention_type = \ treatment_plan['study_type']['balance'][ 'multiple_interventions']['intervention_type_selector'] treatments = set() study_factors = [StudyFactor(name=x.strip()) for x in treatment_plan['study_type'][ 'balance']['multiple_interventions'][ 'study_factors'].split(',')] for group in \ treatment_plan['study_type']['balance'][ 'multiple_interventions']['study_groups']: factor_values = () for x, y in zip(study_factors, [x.strip() for x in group['factor_values'].split( ',')]): factor_value = FactorValue(factor_name=x, value=y) factor_values = factor_values + (factor_value,) if galaxy_parameters['treatment_plan']['study_type'][ 'balance']['balanced_groups']: group_size = int( galaxy_parameters['treatment_plan']['study_type'][ 'balance']['multiple_interventions']['group_size']) else: group_size = int(group['group_size']) treatment = Treatment(treatment_type=intervention_type, factor_values=factor_values, group_size=group_size) treatments.add(treatment) treatment_sequence = TreatmentSequence(ranked_treatments=treatments) return treatment_sequence def _create_sample_plan(sample_assay_plan, sample_plan_record): def _create_nmr_assay_type(assay_plan_record): nmr_assay_type = AssayType( measurement_type='metabolite profiling', technology_type='nmr spectroscopy') nmr_top_mods = NMRTopologyModifiers() nmr_top_mods.technical_replicates = assay_plan_record[ 'assay_type']['acquisition_mode']['technical_replicates'] nmr_top_mods.acquisition_modes.add( assay_plan_record['assay_type'][ 'acquisition_mode']['acquisition_mode_selector']) nmr_top_mods.instruments.add('{} {}'.format( assay_plan_record['assay_type'][ 'acquisition_mode']['nmr_instrument'], assay_plan_record['assay_type']['acquisition_mode']['magnet'])) nmr_top_mods.pulse_sequences.add( assay_plan_record['assay_type'][ 'acquisition_mode']['pulse_sequence'] ) nmr_top_mods.magnet_power = \ assay_plan_record['assay_type']['acquisition_mode']['magnet'] nmr_assay_type.topology_modifiers = nmr_top_mods return nmr_assay_type def _create_ms_assay_type(assay_plan_record): ms_assay_type = AssayType( measurement_type='metabolite profiling', technology_type='mass spectrometry') ms_assay_type.topology_modifiers = MSTopologyModifiers( sample_fractions=set(map( lambda x: x['sample_fraction'], assay_plan_record['assay_type']['sample_fractions']))) injection_modes = ms_assay_type.topology_modifiers.injection_modes if len(assay_plan_record['assay_type']['injections']) > 0: for inj_mod in assay_plan_record['assay_type']['injections']: injection_mode = MSInjectionMode( injection_mode=inj_mod[ 'injection_mode']['injection_mode_selector'], ms_instrument=inj_mod['injection_mode']['instrument'] ) if inj_mod['injection_mode'][ 'injection_mode_selector'] in ('LC', 'GC'): injection_mode.chromatography_instrument = inj_mod[ 'injection_mode']['chromatography_instrument'] if inj_mod[ 'injection_mode']['injection_mode_selector'] == 'LC': injection_mode.chromatography_column = inj_mod[ 'injection_mode']['chromatography_column'] injection_modes.add(injection_mode) for acq_mod in inj_mod['injection_mode']['acquisitions']: injection_mode.acquisition_modes.add( MSAcquisitionMode( acquisition_method=acq_mod['acquisition_mode'], technical_repeats=acq_mod[ 'technical_replicates'] ) ) if inj_mod['injection_mode'][ 'injection_mode_selector'] == 'GC': for deriva in inj_mod['injection_mode'][ 'derivatizations']: derivatization = deriva['derivatization'] if re.match('(.*?) \((.*?)\)', derivatization): matches = next(iter( re.findall('(.*?) \((.*?)\)', derivatization))) term, ontoid = matches[0], matches[1] source_name, accession_id = \ ontoid.split(':')[0], \ ontoid.split(':')[1] source = OntologySource(name=source_name) derivatization = OntologyAnnotation( term=term, term_source=source, term_accession=accession_id) injection_mode.derivatizations.add( derivatization) return ms_assay_type if sample_plan_record['material_type'] == 'user defined': sample_type = sample_plan_record['material_type']['sample_type_ud'] else: sample_type = sample_plan_record['material_type'] if re.match('(.*?) \((.*?)\)', sample_type): matches = next(iter(re.findall('(.*?) \((.*?)\)', sample_type))) term, ontoid = matches[0], matches[1] source_name, accession_id = ontoid.split(':')[0], \ ontoid.split(':')[1] source = OntologySource(name=source_name) sample_type = OntologyAnnotation(term=term, term_source=source, term_accession=accession_id) sample_assay_plan.add_sample_type(sample_type) sample_size = sample_plan_record['sample_collections'] sample_assay_plan.add_sample_plan_record(sample_type, sample_size) for assay_plan_record in sample_plan_record['assay_plans']: tt = assay_plan_record['assay_type']['assay_type_selector'] if tt == 'nmr': assay_type = _create_nmr_assay_type(assay_plan_record) elif tt == 'ms': assay_type = _create_ms_assay_type(assay_plan_record) else: raise NotImplementedError('Only MS and NMR assays supported') sample_assay_plan.add_assay_type(assay_type) sample_assay_plan.add_assay_plan_record(sample_type, assay_type) return sample_assay_plan def _inject_qcqa_plan(sample_assay_plan, qcqa_record): qc_type = qcqa_record['qc_type']['qc_type_selector'] if qc_type == 'interval_series': material_type = qcqa_record['material_type'] if re.match('(.*?) \((.*?)\)', material_type): matches = next(iter( re.findall('(.*?) \((.*?)\)', material_type))) term, ontoid = matches[0], matches[1] source_name, accession_id = ontoid.split(':')[0], \ ontoid.split(':')[1] source = OntologySource(name=source_name) material_type = OntologyAnnotation( term=term, term_source=source, term_accession=accession_id) sample_assay_plan.add_sample_qc_plan_record( material_type=material_type, injection_interval=qcqa_record[ 'qc_type']['injection_frequency']) elif 'dilution_series' in qc_type: values = [int(x) for x in qcqa_record[ 'qc_type']['values'].split(',')] material_type = qcqa_record['material_type'] if re.match('(.*?) \((.*?)\)', material_type): matches = next(iter( re.findall('(.*?) \((.*?)\)', material_type))) term, ontoid = matches[0], matches[1] source_name, accession_id = ontoid.split(':')[0], \ ontoid.split(':')[1] source = OntologySource(name=source_name) material_type = OntologyAnnotation( term=term, term_source=source, term_accession=accession_id) batch = SampleQCBatch(material=material_type) for value in values: batch.characteristic_values.append( Characteristic(category=OntologyAnnotation( term='quantity'), value=value) ) if 'pre' in qc_type: sample_assay_plan.pre_run_batch = batch elif 'post' in qc_type: sample_assay_plan.post_run_batch = batch else: raise NotImplementedError('QC type not recognized!') return sample_assay_plan # pre-generation checks if galaxy_parameters_file: galaxy_parameters = json.load(galaxy_parameters_file) log.debug(json.dumps(galaxy_parameters, indent=4)) else: raise IOError('Could not load Galaxy parameters file!') if target_dir: if not os.path.exists(target_dir): raise IOError('Target path does not exist!') if len(galaxy_parameters['sample_and_assay_planning']['sample_plans']) == 0: raise IOError('No Sampling plan specified') treatment_sequence = _create_treatment_sequence(galaxy_parameters) sample_assay_plan = SampleAssayPlan() for sample_plan_record in galaxy_parameters['sample_and_assay_planning'][ 'sample_plans']: _ = _create_sample_plan(sample_assay_plan, sample_plan_record) for qcqa_record in galaxy_parameters['qc_planning']['qc_plans']: _ = _inject_qcqa_plan(sample_assay_plan, qcqa_record) try: sample_assay_plan.group_size = \ int(galaxy_parameters['treatment_plan']['study_type'][ 'multiple_interventions']['group_size']) except KeyError: try: sample_assay_plan.group_size = \ int(galaxy_parameters['treatment_plan']['study_type'][ 'balance']['multiple_interventions']['group_size']) except KeyError: log.debug( 'Group size not set for root plan as multiple intervention') sample_assay_plan.group_size = 0 # raises AttributeError study_info = galaxy_parameters['study_metadata'] if len(sample_assay_plan.sample_plan) == 0: log.info('No sample plan defined') if len(sample_assay_plan.assay_plan) == 0: log.info('No assay plan defined') study_design = StudyDesign() study_design.add_single_sequence_plan(treatment_sequence, sample_assay_plan) isa_object_factory = IsaModelObjectFactory(study_design) if len(sample_assay_plan.sample_plan) == 0: s = Study() else: s = isa_object_factory.create_assays_from_plan() c = Person() c.affiliation = study_info.get('affiliation') c.last_name = study_info.get('last_name') c.email = study_info['email'] c.first_name = study_info['first_name'] s.contacts = [c] s.description = study_info['description'] s.filename = 's_study.txt' s.title = study_info['title'] s.identifier = 'ISA-{}'.format(uuid.uuid4().hex[:8]) s.comments = [ Comment(name='Consent Information (ICO:0000011)', value=study_info['study_consent']), Comment(name='Data Use Requirement (DUO:0000017)', value=study_info['study_use_condition']) ] i = Investigation() i.contacts = [c] i.description = "" i.title = "Investigation" i.identifier = s.identifier i.studies = [s] try: i.ontology_source_references = s.ontology_source_references except AttributeError: pass i.ontology_source_references.append(OntologySource(name='ICO')) i.ontology_source_references.append(OntologySource(name='DUO')) def sanitize_filename(filename): filename = str(filename).strip().replace(' ', '_') filename = re.sub(r'(?u)[^-\w.]', '_', filename) return filename i.filename = sanitize_filename(i.filename) for s in i.studies: s.filename = sanitize_filename(s.filename) for a in s.assays: a.filename = sanitize_filename(a.filename) isatab.dump(isa_obj=i, output_path=target_dir)