예제 #1
0
def create_descriptor():
    """Returns a ISA-Tab descriptor using a simple sample plan for
    illustration."""
    investigation = Investigation(identifier='I1')
    plan = SampleAssayPlan()
    plan.add_sample_type('liver')
    plan.add_sample_plan_record('liver', 5)
    plan.add_sample_type('blood')
    plan.add_sample_plan_record('blood', 3)
    plan.group_size = 2
    f1 = StudyFactor(name='AGENT',
                     factor_type=OntologyAnnotation(term='pertubation agent'))
    f2 = StudyFactor(name='INTENSITY',
                     factor_type=OntologyAnnotation(term='intensity'))
    f3 = StudyFactor(name='DURATION',
                     factor_type=OntologyAnnotation(term='time'))
    treatment_factory = TreatmentFactory(factors=[f1, f2, f3])
    treatment_factory.add_factor_value(f1, {'cocaine', 'crack', 'aether'})
    treatment_factory.add_factor_value(f2, {'low', 'medium', 'high'})
    treatment_factory.add_factor_value(f3, {'short', 'long'})
    ffactorial_design_treatments = treatment_factory\
        .compute_full_factorial_design()
    treatment_sequence = TreatmentSequence(
        ranked_treatments=ffactorial_design_treatments)
    # treatment_factory.add_factor_value('intensity', 1.05)
    study = IsaModelObjectFactory(plan, treatment_sequence)\
        .create_study_from_plan()
    study.filename = 's_study.txt'
    investigation.studies = [study]
    print(isatab.dumps(investigation))
예제 #2
0
def create_from_plan_parameters(galaxy_parameters_file,
                                sample_assay_plans_file, study_info_file,
                                treatment_plans_file, target_dir):
    decoder = SampleAssayPlanDecoder()
    if galaxy_parameters_file:
        galaxy_parameters = json.load(galaxy_parameters_file)
        sample_and_assay_plans, study_info, treatment_plan_params = \
            map_galaxy_to_isa_create_json(galaxy_parameters)
        plan = decoder.load(io.StringIO(json.dumps(sample_and_assay_plans)))
    elif sample_assay_plans_file and study_info_file and treatment_plans_file:
        plan = decoder.load(sample_assay_plans_file)
        study_info = json.load(study_info_file)
        treatment_plan_params = json.load(treatment_plans_file)
    else:
        raise IOError('Wrong parameters provided')

    study_type = treatment_plan_params['study_type_cond']['study_type']
    if study_type != 'intervention':
        raise NotImplementedError('Only supports Intervention studies')

    single_or_multiple = treatment_plan_params['study_type_cond'][
        'one_or_more']['single_or_multiple']
    if single_or_multiple == 'multiple':
        raise NotImplementedError(
            'Multiple treatments not yet implemented. Please select Single')

    intervention_type = treatment_plan_params['study_type_cond'][
        'one_or_more']['intervention_type']['select_intervention_type']
    if intervention_type != 'chemical intervention':
        raise NotImplementedError(
            'Only Chemical Interventions supported at this time')

    treatment_factory = TreatmentFactory(
        intervention_type=INTERVENTIONS['CHEMICAL'], factors=BASE_FACTORS)
    agent_levels = treatment_plan_params['study_type_cond']['one_or_more'][
        'intervention_type']['agent'].split(',')
    for agent_level in agent_levels:
        treatment_factory.add_factor_value(BASE_FACTORS[0],
                                           agent_level.strip())
    dose_levels = treatment_plan_params['study_type_cond']['one_or_more'][
        'intervention_type']['intensity'].split(',')
    for dose_level in dose_levels:
        treatment_factory.add_factor_value(BASE_FACTORS[1], dose_level.strip())
    duration_of_exposure_levels = treatment_plan_params['study_type_cond'][
        'one_or_more']['intervention_type']['duration'].split(',')
    for duration_of_exposure_level in duration_of_exposure_levels:
        treatment_factory.add_factor_value(BASE_FACTORS[2],
                                           duration_of_exposure_level.strip())
    treatment_sequence = TreatmentSequence(
        ranked_treatments=treatment_factory.compute_full_factorial_design())
    isa_object_factory = IsaModelObjectFactory(plan, treatment_sequence)
    s = isa_object_factory.create_assays_from_plan()
    contact = Person()
    contact.affiliation = study_info['study_pi_affiliation']
    contact.last_name = study_info['study_pi_last_name']
    contact.email = study_info['study_pi_email']
    contact.first_name = study_info['study_pi_first_name']
    s.contacts = [contact]
    s.description = study_info['study_description']
    s.filename = 's_study.txt'
    s.title = 'ISA created {}'.format(datetime.datetime.now().isoformat())
    s.identifier = 'ISA-{}'.format(uuid.uuid4().hex[:8])

    i = Investigation()
    i.contacts = [contact]
    i.description = s.description
    i.title = s.title
    i.identifier = s.identifier

    i.studies = [s]
    isatab.dump(isa_obj=i,
                output_path=target_dir,
                i_file_name='i_investigation.txt')

    for assay in s.assays:
        for data_file in assay.data_files:
            data_file_path = os.path.join(target_dir, data_file.filename)
            with open(data_file_path, 'a'):
                os.utime(data_file_path, None)
예제 #3
0
def create_from_galaxy_parameters(galaxy_parameters_file, target_dir):

    def _create_treatment_sequence(galaxy_parameters):
        treatment_plan = galaxy_parameters['treatment_plan']
        study_type = treatment_plan['study_type']['study_type_selector']
        log.debug(json.dumps(galaxy_parameters, indent=4))
        try:
            single_or_multiple = treatment_plan['study_type']['balance'][
                'multiple_interventions']
        except KeyError:
            single_or_multiple = \
                treatment_plan['study_type']['multiple_interventions'][
                    'multiple_interventions_selector']
        if single_or_multiple == 'multiple':
            raise NotImplementedError(
                'Multiple treatments not yet implemented. Please select Single')

        if study_type == 'full_factorial':
            intervention_type = \
            treatment_plan['study_type']['multiple_interventions'][
                'intervention_type']['intervention_type_selector']
            if intervention_type == 'chemical intervention':
                interventions = INTERVENTIONS['CHEMICAL']
            elif intervention_type == 'dietary intervention':
                interventions = INTERVENTIONS['DIET']
            elif intervention_type == 'behavioural intervention':
                interventions = INTERVENTIONS['BEHAVIOURAL']
            elif intervention_type == 'biological intervention':
                interventions = INTERVENTIONS['BIOLOGICAL']
            elif intervention_type == 'surgical intervention':
                interventions = INTERVENTIONS['SURGICAL']
            elif intervention_type == 'radiological intervention':  # not in tool yet
                interventions = INTERVENTIONS['RADIOLOGICAL']
            else:  # default to chemical
                interventions = INTERVENTIONS['CHEMICAL']
            treatment_factory = TreatmentFactory(
                intervention_type=interventions, factors=BASE_FACTORS)

            # Treatment Sequence
            agent_levels = \
            treatment_plan['study_type']['multiple_interventions'][
                'intervention_type']['agent'].split(',')
            for agent_level in agent_levels:
                treatment_factory.add_factor_value(BASE_FACTORS[0],
                                                   agent_level.strip())
            dose_levels = \
            treatment_plan['study_type']['multiple_interventions'][
                'intervention_type']['intensity'].split(',')
            for dose_level in dose_levels:
                treatment_factory.add_factor_value(BASE_FACTORS[1],
                                                   dose_level.strip())
            duration_of_exposure_levels = treatment_plan[
                'study_type']['multiple_interventions']['intervention_type'][
                'duration'].split(',')
            for duration_of_exposure_level in duration_of_exposure_levels:
                treatment_factory.add_factor_value(
                    BASE_FACTORS[2], duration_of_exposure_level.strip())
            treatment_sequence = TreatmentSequence(
                ranked_treatments=treatment_factory
                    .compute_full_factorial_design())
            group_size = int(
                galaxy_parameters['treatment_plan']['study_type'][
                    'multiple_interventions']['group_size'])
            for ranked_treatment in \
                    treatment_sequence.ranked_treatments:
                ranked_treatment[0].group_size = group_size
            return treatment_sequence

        elif study_type == 'fractional_factorial':
            intervention_type = \
                treatment_plan['study_type']['balance'][
                    'multiple_interventions']['intervention_type_selector']
            treatments = set()
            study_factors = [StudyFactor(name=x.strip()) for x in
                             treatment_plan['study_type'][
                                 'balance']['multiple_interventions'][
                                 'study_factors'].split(',')]
            for group in \
                    treatment_plan['study_type']['balance'][
                        'multiple_interventions']['study_groups']:
                factor_values = ()
                for x, y in zip(study_factors, [x.strip() for x in
                                                group['factor_values'].split(
                                                    ',')]):
                    factor_value = FactorValue(factor_name=x, value=y)
                    factor_values = factor_values + (factor_value,)
                if galaxy_parameters['treatment_plan']['study_type'][
                    'balance']['balanced_groups']:
                    group_size = int(
                        galaxy_parameters['treatment_plan']['study_type'][
                            'balance']['multiple_interventions']['group_size'])
                else:
                    group_size = int(group['group_size'])
                treatment = Treatment(treatment_type=intervention_type,
                    factor_values=factor_values, group_size=group_size)
                treatments.add(treatment)
            treatment_sequence = TreatmentSequence(ranked_treatments=treatments)
            return treatment_sequence

    def _create_sample_plan(sample_assay_plan, sample_plan_record):

        def _create_nmr_assay_type(assay_plan_record):
            nmr_assay_type = AssayType(
                measurement_type='metabolite profiling',
                technology_type='nmr spectroscopy')
            nmr_top_mods = NMRTopologyModifiers()
            nmr_top_mods.technical_replicates = assay_plan_record[
                'assay_type']['acquisition_mode']['technical_replicates']
            nmr_top_mods.acquisition_modes.add(
                assay_plan_record['assay_type'][
                    'acquisition_mode']['acquisition_mode_selector'])
            nmr_top_mods.instruments.add('{} {}'.format(
                assay_plan_record['assay_type'][
                    'acquisition_mode']['nmr_instrument'],
                assay_plan_record['assay_type']['acquisition_mode']['magnet']))
            nmr_top_mods.pulse_sequences.add(
                assay_plan_record['assay_type'][
                    'acquisition_mode']['pulse_sequence']
            )
            nmr_top_mods.magnet_power = \
                assay_plan_record['assay_type']['acquisition_mode']['magnet']
            nmr_assay_type.topology_modifiers = nmr_top_mods
            return nmr_assay_type

        def _create_ms_assay_type(assay_plan_record):
            ms_assay_type = AssayType(
                measurement_type='metabolite profiling',
                technology_type='mass spectrometry')
            ms_assay_type.topology_modifiers = MSTopologyModifiers(
                sample_fractions=set(map(
                    lambda x: x['sample_fraction'],
                    assay_plan_record['assay_type']['sample_fractions'])))
            injection_modes = ms_assay_type.topology_modifiers.injection_modes
            if len(assay_plan_record['assay_type']['injections']) > 0:
                for inj_mod in assay_plan_record['assay_type']['injections']:
                    injection_mode = MSInjectionMode(
                        injection_mode=inj_mod[
                            'injection_mode']['injection_mode_selector'],
                        ms_instrument=inj_mod['injection_mode']['instrument']
                    )
                    if inj_mod['injection_mode'][
                        'injection_mode_selector'] in ('LC', 'GC'):
                        injection_mode.chromatography_instrument = inj_mod[
                            'injection_mode']['chromatography_instrument']
                    if inj_mod[
                        'injection_mode']['injection_mode_selector'] == 'LC':
                        injection_mode.chromatography_column = inj_mod[
                            'injection_mode']['chromatography_column']
                    injection_modes.add(injection_mode)
                    for acq_mod in inj_mod['injection_mode']['acquisitions']:
                        injection_mode.acquisition_modes.add(
                            MSAcquisitionMode(
                                acquisition_method=acq_mod['acquisition_mode'],
                                technical_repeats=acq_mod[
                                    'technical_replicates']
                            )
                        )
                        if inj_mod['injection_mode'][
                            'injection_mode_selector'] == 'GC':
                            for deriva in inj_mod['injection_mode'][
                                    'derivatizations']:
                                derivatization = deriva['derivatization']
                                if re.match('(.*?) \((.*?)\)', derivatization):
                                    matches = next(iter(
                                        re.findall('(.*?) \((.*?)\)',
                                                   derivatization)))
                                    term, ontoid = matches[0], matches[1]
                                    source_name, accession_id = \
                                    ontoid.split(':')[0], \
                                    ontoid.split(':')[1]
                                    source = OntologySource(name=source_name)
                                    derivatization = OntologyAnnotation(
                                        term=term, term_source=source,
                                        term_accession=accession_id)
                                injection_mode.derivatizations.add(
                                    derivatization)
            return ms_assay_type

        if sample_plan_record['material_type'] == 'user defined':
            sample_type = sample_plan_record['material_type']['sample_type_ud']
        else:
            sample_type = sample_plan_record['material_type']
            if re.match('(.*?) \((.*?)\)', sample_type):
                matches = next(iter(re.findall('(.*?) \((.*?)\)', sample_type)))
                term, ontoid = matches[0], matches[1]
                source_name, accession_id = ontoid.split(':')[0], \
                                            ontoid.split(':')[1]
                source = OntologySource(name=source_name)
                sample_type = OntologyAnnotation(term=term, term_source=source,
                                                 term_accession=accession_id)
        sample_assay_plan.add_sample_type(sample_type)
        sample_size = sample_plan_record['sample_collections']
        sample_assay_plan.add_sample_plan_record(sample_type, sample_size)
        for assay_plan_record in sample_plan_record['assay_plans']:
            tt = assay_plan_record['assay_type']['assay_type_selector']
            if tt == 'nmr':
                assay_type = _create_nmr_assay_type(assay_plan_record)
            elif tt == 'ms':
                assay_type = _create_ms_assay_type(assay_plan_record)
            else:
                raise NotImplementedError('Only MS and NMR assays supported')
            sample_assay_plan.add_assay_type(assay_type)
            sample_assay_plan.add_assay_plan_record(sample_type, assay_type)
        return sample_assay_plan

    def _inject_qcqa_plan(sample_assay_plan, qcqa_record):
        qc_type = qcqa_record['qc_type']['qc_type_selector']
        if qc_type == 'interval_series':
            material_type = qcqa_record['material_type']
            if re.match('(.*?) \((.*?)\)', material_type):
                matches = next(iter(
                    re.findall('(.*?) \((.*?)\)', material_type)))
                term, ontoid = matches[0], matches[1]
                source_name, accession_id = ontoid.split(':')[0], \
                                            ontoid.split(':')[1]
                source = OntologySource(name=source_name)
                material_type = OntologyAnnotation(
                    term=term, term_source=source, term_accession=accession_id)
            sample_assay_plan.add_sample_qc_plan_record(
                material_type=material_type,
                injection_interval=qcqa_record[
                    'qc_type']['injection_frequency'])
        elif 'dilution_series' in qc_type:
            values = [int(x) for x in qcqa_record[
                'qc_type']['values'].split(',')]
            material_type = qcqa_record['material_type']
            if re.match('(.*?) \((.*?)\)', material_type):
                matches = next(iter(
                    re.findall('(.*?) \((.*?)\)', material_type)))
                term, ontoid = matches[0], matches[1]
                source_name, accession_id = ontoid.split(':')[0], \
                                            ontoid.split(':')[1]
                source = OntologySource(name=source_name)
                material_type = OntologyAnnotation(
                    term=term, term_source=source, term_accession=accession_id)
            batch = SampleQCBatch(material=material_type)
            for value in values:
                batch.characteristic_values.append(
                    Characteristic(category=OntologyAnnotation(
                        term='quantity'), value=value)
                )
            if 'pre' in qc_type:
                sample_assay_plan.pre_run_batch = batch
            elif 'post' in qc_type:
                sample_assay_plan.post_run_batch = batch
        else:
            raise NotImplementedError('QC type not recognized!')

        return sample_assay_plan

    # pre-generation checks
    if galaxy_parameters_file:
        galaxy_parameters = json.load(galaxy_parameters_file)
        log.debug(json.dumps(galaxy_parameters, indent=4))
    else:
        raise IOError('Could not load Galaxy parameters file!')
    if target_dir:
        if not os.path.exists(target_dir):
            raise IOError('Target path does not exist!')
    if len(galaxy_parameters['sample_and_assay_planning']['sample_plans']) == 0:
        raise IOError('No Sampling plan specified')

    treatment_sequence = _create_treatment_sequence(galaxy_parameters)
    sample_assay_plan = SampleAssayPlan()
    for sample_plan_record in galaxy_parameters['sample_and_assay_planning'][
            'sample_plans']:
        _ = _create_sample_plan(sample_assay_plan, sample_plan_record)
    for qcqa_record in galaxy_parameters['qc_planning']['qc_plans']:
        _ = _inject_qcqa_plan(sample_assay_plan, qcqa_record)
    try:
        sample_assay_plan.group_size = \
            int(galaxy_parameters['treatment_plan']['study_type'][
                'multiple_interventions']['group_size'])
    except KeyError:
        try:
            sample_assay_plan.group_size = \
                int(galaxy_parameters['treatment_plan']['study_type'][
                    'balance']['multiple_interventions']['group_size'])
        except KeyError:
            log.debug(
                'Group size not set for root plan as multiple intervention')
            sample_assay_plan.group_size = 0  # raises AttributeError

    study_info = galaxy_parameters['study_metadata']

    if len(sample_assay_plan.sample_plan) == 0:
        log.info('No sample plan defined')
    if len(sample_assay_plan.assay_plan) == 0:
        log.info('No assay plan defined')

    study_design = StudyDesign()
    study_design.add_single_sequence_plan(treatment_sequence, sample_assay_plan)
    isa_object_factory = IsaModelObjectFactory(study_design)
    if len(sample_assay_plan.sample_plan) == 0:
        s = Study()
    else:
        s = isa_object_factory.create_assays_from_plan()

    c = Person()
    c.affiliation = study_info.get('affiliation')
    c.last_name = study_info.get('last_name')
    c.email = study_info['email']
    c.first_name = study_info['first_name']
    s.contacts = [c]
    s.description = study_info['description']
    s.filename = 's_study.txt'
    s.title = study_info['title']
    s.identifier = 'ISA-{}'.format(uuid.uuid4().hex[:8])
    s.comments = [
        Comment(name='Consent Information (ICO:0000011)',
                value=study_info['study_consent']),
        Comment(name='Data Use Requirement (DUO:0000017)',
                value=study_info['study_use_condition'])
    ]
    i = Investigation()
    i.contacts = [c]
    i.description = ""
    i.title = "Investigation"
    i.identifier = s.identifier
    i.studies = [s]
    try:
        i.ontology_source_references = s.ontology_source_references
    except AttributeError:
        pass
    i.ontology_source_references.append(OntologySource(name='ICO'))
    i.ontology_source_references.append(OntologySource(name='DUO'))

    def sanitize_filename(filename):
        filename = str(filename).strip().replace(' ', '_')
        filename = re.sub(r'(?u)[^-\w.]', '_', filename)
        return filename

    i.filename = sanitize_filename(i.filename)
    for s in i.studies:
        s.filename = sanitize_filename(s.filename)
        for a in s.assays:
            a.filename = sanitize_filename(a.filename)

    isatab.dump(isa_obj=i, output_path=target_dir)
예제 #4
0
def convert(json_path, output_path):
    print(json_path)
    print(output_path)

    with open(json_path, 'r') as f:
        dcc_json = json.load(f)

    # print(array['protocol'])
    # for element in array['protocol']:
    #     array['protocol'][element]['id']
    #     array['protocol'][element]['description']
    #     array['protocol'][element]['type']
    #     array['protocol'][element]['filename']

    # for element in array['measurement']:
    #     print(array['measurement'][element]['corrected_mz'])

    # for element in array['subject']:
    #     print(array['subject'][element]['species'])

    # Building the Investigation Object and its elements:

    project_set_json = dcc_json.get('project')

    if len(project_set_json) == 0:
        raise IOError('No project found in input JSON')

    # print(next(iter(project_set_json)))
    project_json = next(iter(project_set_json.values()))
    investigation = Investigation(identifier=project_json['id'])

    obi = OntologySource(name='OBI',
                         description='Ontology for Biomedical Investigations')
    investigation.ontology_source_references.append(obi)

    inv_person = Person(
        first_name=project_json['PI_first_name'],
        last_name=project_json['PI_last_name'],
        email=project_json['PI_email'],
        address=project_json['address'],
        affiliation=(', '.join(
            [project_json['department'], project_json['institution']])),
        roles=[
            OntologyAnnotation(term="",
                               term_source=obi,
                               term_accession="http://purl.org/obo/OBI_1")
        ])
    investigation.contacts.append(inv_person)

    study_set_json = dcc_json.get('study')

    if len(study_set_json) > 0:
        study_json = next(iter(study_set_json.values()))

        study = Study(
            identifier=study_json['id'],
            title=study_json['title'],
            description=study_json['description'],
            design_descriptors=[
                OntologyAnnotation(term=study_json['type'],
                                   term_source=obi,
                                   term_accession="http://purl.org/obo/OBI_1")
            ],
            filename='s_{study_id}.txt'.format(study_id=study_json['id']))

        investigation.studies = [study]

        studyid = study_json['id']
        print(studyid)
        study_person = Person(
            first_name=study_json['PI_first_name'],
            last_name=study_json['PI_last_name'],
            email=study_json['PI_email'],
            address=study_json['address'],
            affiliation=(', '.join(
                [study_json['department'], study_json['institution']])),
            roles=[
                OntologyAnnotation(term='principal investigator',
                                   term_source=obi,
                                   term_accession="http://purl.org/obo/OBI_1")
            ])

        study.contacts.append(study_person)

        for factor_json in dcc_json['factor'].values():
            factor = StudyFactor(name=factor_json['id'])
            study.factors.append(factor)

        for i, protocol_json in enumerate(dcc_json['protocol'].values()):
            oat_p = protocol_json['type']
            oa_protocol_type = OntologyAnnotation(
                term=oat_p,
                term_source=obi,
                term_accession="http://purl.org/obo/OBI_1")
            study.protocols.append(
                Protocol(name=protocol_json['id'],
                         protocol_type=oa_protocol_type,
                         description=protocol_json['description'],
                         uri=protocol_json['filename']))

            if 'MS' in protocol_json['type']:
                study.assays.append(
                    Assay(measurement_type=OntologyAnnotation(
                        term='mass isotopologue distribution analysis',
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_112"),
                          technology_type=OntologyAnnotation(
                              term='mass spectrometry',
                              term_source=obi,
                              term_accession="http://purl.org/obo/OBI_1"),
                          filename='a_assay_ms_{count}.txt'.format(count=i)))

            if 'NMR' in protocol_json['type']:
                study.assays.append(
                    Assay(measurement_type=OntologyAnnotation(
                        term='isotopomer analysis',
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_111"),
                          technology_type=OntologyAnnotation(
                              term='nmr spectroscopy',
                              term_source=obi,
                              term_accession="http://purl.org/obo/OBI_1"),
                          filename='a_assay_nmr.txt'))

        for subject_json in dcc_json['subject'].values():

            # print(array['subject'][element])
            if "organism" in subject_json['type']:

                source = Source(name=subject_json['id'])

                ncbitaxon = OntologySource(name='NCBITaxon',
                                           description="NCBI Taxonomy")
                characteristic_organism = Characteristic(
                    category=OntologyAnnotation(term="Organism"),
                    value=OntologyAnnotation(
                        term=subject_json['species'],
                        term_source=ncbitaxon,
                        term_accession=
                        'http://purl.bioontology.org/ontology/NCBITAXON/9606'))
                source.characteristics.append(characteristic_organism)
                study.sources.append(source)

            elif 'tissue_slice' in subject_json['type']:
                # print(array['subject'][element]['type'])
                source = Source(name=subject_json['id'])
                study.sources.append(source)
                ncbitaxon = OntologySource(name='NCBITaxon',
                                           description="NCBI Taxonomy")
                characteristic_organism = Characteristic(
                    category=OntologyAnnotation(term="Organism"),
                    value=OntologyAnnotation(
                        term=subject_json['species'],
                        term_source=ncbitaxon,
                        term_accession=
                        'http://purl.bioontology.org/ontology/NCBITAXON/9606'))
                source.characteristics.append(characteristic_organism)

                sample = Sample(name=subject_json['id'],
                                derives_from=subject_json['parentID'])
                characteristic_organismpart = Characteristic(
                    category=OntologyAnnotation(term='organism_part'),
                    value=OntologyAnnotation(
                        term=subject_json['tissue_type'],
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_1"))

                sample.characteristics.append(characteristic_organismpart)
                study.samples.append(sample)
                # print(study.samples[0].name)

                sample_collection_process = Process(
                    executes_protocol=study.get_prot(
                        subject_json['protocol.id']))
                sample_collection_process.inputs.append(source)
                sample_collection_process.outputs.append(sample)
                study.process_sequence.append(sample_collection_process)

            else:
                source = Source(name=subject_json['id'])

                ncbitaxon = OntologySource(name='NCBITaxon',
                                           description="NCBI Taxonomy")
                characteristic_organism = Characteristic(
                    category=OntologyAnnotation(term="Organism"),
                    value=OntologyAnnotation(
                        term=subject_json['species'],
                        term_source=ncbitaxon,
                        term_accession=
                        'http://purl.bioontology.org/ontology/NCBITAXON/9606'))
                source.characteristics.append(characteristic_organism)
                study.sources.append(source)
                print(subject_json['id'])
                print(subject_json['species'])
                print(subject_json['type'])
        # for src in investigation.studies[0].materials:
        #
        # for sam in investigation.studies[0].materials:

        for sample_json in dcc_json['sample'].values():

            if 'cells' in sample_json['type']:
                material_separation_process = Process(
                    executes_protocol=study.get_prot(
                        sample_json['protocol.id']))
                material_separation_process.name = sample_json['id']
                # dealing with input material, check that the parent material is already among known samples or sources

                if len([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                ]) == 0:
                    material_in = Sample(name=sample_json['parentID'])
                    material_separation_process.inputs.append(material_in)
                    study.assays[0].samples.append(material_in)
                else:
                    print([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                    ])
                    material_separation_process.inputs.append([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                    ][0])

                material_out = Sample(name=sample_json['id'])
                material_type = Characteristic(
                    category=OntologyAnnotation(term='material_type'),
                    value=OntologyAnnotation(
                        term=sample_json['type'],
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_xxxxxxx"))
                material_out.characteristics.append(material_type)
                material_separation_process.outputs.append(material_out)
                study.assays[0].samples.append(material_out)
                try:
                    sample_collection_process
                except NameError:
                    sample_collection_process = None
                if sample_collection_process is None:
                    sample_collection_process = Process(executes_protocol="")
                else:
                    # plink(protein_extraction_process, data_acq_process)
                    # plink(material_separation_process, protein_extraction_process)

                    plink(sample_collection_process,
                          protein_extraction_process)

            if 'protein_extract' in sample_json['type']:
                protein_extraction_process = Process(
                    executes_protocol=study.get_prot(
                        sample_json['protocol.id']))
                protein_extraction_process.name = sample_json['id']

                if len([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                ]) == 0:
                    material_in = Sample(name=sample_json['parentID'])
                    protein_extraction_process.inputs.append(material_in)
                    study.assays[0].samples.append(material_in)
                else:
                    # print([x for x in study.samples if x.name == sample_json['parentID']])
                    protein_extraction_process.inputs.append(material_in)

                # for material_in in study.samples:
                #     # print("OHO:", material_in.name)
                #     if material_in.name == sample_json['parentID']:
                #         # print("C:",sample_json['parentID'])
                #         #no need to create, just link to process
                #         protein_extraction_process.inputs.append(x)
                #     else:
                #         # print("D:", sample_json['parentID'])
                #         #create new material and link
                #         material_in = Sample(name=sample_json['parentID'])
                #         protein_extraction_process.inputs.append(material_in)

                material_out = Material(name=sample_json['id'])
                material_out.type = "Extract Name"
                material_type = Characteristic(
                    category=OntologyAnnotation(term='material_type'),
                    value=OntologyAnnotation(
                        term=sample_json['type'],
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_1"))
                material_out.characteristics.append(material_type)

                study.assays[0].samples.append(material_in)
                study.assays[0].materials['other_material'].append(material_in)
                try:
                    material_separation_process
                except NameError:
                    material_separation_process = None
                if material_separation_process is None:
                    material_separation_process = Process(executes_protocol="")
                else:
                    # plink(protein_extraction_process, data_acq_process)
                    plink(material_separation_process,
                          protein_extraction_process)

            if 'polar' in sample_json['type']:

                material_in = Material(name=sample_json['parentID'])
                material_type = Characteristic(
                    category=OntologyAnnotation(term='material_type',
                                                term_source=obi),
                    value=OntologyAnnotation(term=sample_json['type'],
                                             term_source=obi))
                material_in.characteristics.append(material_type)
                study.assays[0].materials['other_material'].append(material_in)

                data_acq_process = Process(executes_protocol=study.get_prot(
                    sample_json['protocol.id']))
                data_acq_process.name = sample_json['id']
                datafile = DataFile(
                    filename='{filename}.txt'.format(filename='_'.join(
                        ['mass_isotopomer-data', studyid, sample_json['id']])),
                    label='Raw Data File')
                data_acq_process.outputs.append(datafile)
                # print(study.assays[0].technology_type.term)

                study.assays[0].data_files.append(datafile)
                try:
                    protein_extraction_process
                except NameError:
                    protein_extraction_process = None
                if protein_extraction_process is None:
                    protein_extraction_process = Process(executes_protocol="")
                else:
                    plink(protein_extraction_process, data_acq_process)

            # else:
            #     material_in = Material(name=sample_json['parentID'])
            #     material_out = Material(name=sample_json['id'])
            #     material_type = Characteristic(
            #         category=OntologyAnnotation(term="material_type"),
            #         value=OntologyAnnotation(term=sample_json['type'],
            #                                  term_source=obi,
            #                                  term_accession="http://purl.org/obo/OBI_1"))
            #     material_out.characteristics.append(material_type)
            #     process = Process(executes_protocol=sample_json['protocol.id'])
            #     process.name = sample_json['id']
            #     process.inputs.append(material_in)
            #     process.outputs.append(material_out)
            #
            #     study.assays[0].materials['other_material'].append(material_in)
            #     study.assays[0].materials['other_material'].append(material_out)

            if 'bulk_tissue' in sample_json['type']:
                bulk_process = Process(executes_protocol=study.get_prot(
                    sample_json['protocol.id']))
                bulk_process.name = sample_json['id']

                if len([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                ]) == 0:
                    material_in = Sample(name=sample_json['parentID'])
                    bulk_process.inputs.append(material_in)
                    study.assays[0].samples.append(material_in)
                else:
                    # print([x for x in study.samples if x.name == sample_json['parentID']])
                    bulk_process.inputs.append(material_in)

                    plink(sample_collection_process, bulk_process)

    data_rec_header = '\t'.join(
        ('metabolite name', 'assignment', 'signal intensity', 'retention time',
         'm/z', 'formula', 'adduct', 'isotopologue', 'sample identifier'))
    records = []
    for element in dcc_json['measurement']:
        # metabolite_name: -> compound
        # array['measurement'][element]['signal_intensity']
        record = '\t'.join((dcc_json['measurement'][element]['compound'],
                            dcc_json['measurement'][element]['assignment'],
                            dcc_json['measurement'][element]['raw_intensity'],
                            dcc_json['measurement'][element]['retention_time'],
                            dcc_json['measurement'][element]['corrected_mz'],
                            dcc_json['measurement'][element]['formula'],
                            dcc_json['measurement'][element]['adduct'],
                            dcc_json['measurement'][element]['isotopologue'],
                            dcc_json['measurement'][element]['sample.id']))
        # print(record)
        records.append(record)

    if not os.path.exists(output_path):
        os.makedirs(output_path)
        try:
            with open(
                    '{output_path}/{study_id}-maf-data-nih-dcc-json.txt'.
                    format(output_path=output_path,
                           study_id=studyid), 'w') as fh:
                print(
                    "'writing 'maf file document' to file from 'generate_maf_file' method:..."
                )
                fh.writelines(data_rec_header)
                fh.writelines('\n')
                for item in records:
                    fh.writelines(item)
                    fh.writelines('\n')

            print("writing 'investigation information' to file...")
            print(isatab.dumps(investigation))

            isatab.dump(investigation, output_path=output_path)
        except IOError:
            print("Error: in main() method can't open file or write data")