예제 #1
0
파일: utils.py 프로젝트: scottx611x/isa-api
 def remove_unused_protocols(self):
     investigation = isatab.load(os.path.dirname(self.path))
     for study in investigation.studies:
         unused_protocol_names = set(x.name for x in study.protocols)
         for process in study.process_sequence:
             try:
                 unused_protocol_names.remove(
                     process.executes_protocol.name)
             except KeyError:
                 pass
         for assay in study.assays:
             for process in assay.process_sequence:
                 try:
                     unused_protocol_names.remove(
                         process.executes_protocol.name)
                 except KeyError:
                     pass
         print('Unused protocols: {}'.format(unused_protocol_names))
         # remove these protocols from study.protocols
         clean_protocols_list = []
         for protocol in study.protocols:
             if protocol.name not in unused_protocol_names:
                 clean_protocols_list.append(protocol)
         study.protocols = clean_protocols_list
     isatab.dump(investigation,
                 output_path=os.path.dirname(self.path),
                 i_file_name='{filename}.fix'.format(
                     filename=os.path.basename(self.path)),
                 skip_dump_tables=True)
예제 #2
0
def convert(source_idf_fp, output_path):
    """ Converter for MAGE-TAB to ISA-Tab
    :param source_idf_fp: File descriptor of input IDF file
    :param output_dir: Path to directory to write output ISA-Tab files to
    """
    ISA = magetab.load(source_idf_fp)
    isatab.dump(ISA, output_path)
예제 #3
0
def dropAssayFromStudy(assayNum, studyNum, pathToISATABFile):
    """
    This function removes an Assay from a study in an ISA file
    Typically, you should use the exploreISA function to check the contents
    of the ISA file and retrieve the assay and study numbers you are interested in!
    :param assayNum: The Assay number (notice it's 1-based index).
    :type assayNum: int
    :param studyNum: The Study number (notice it's 1-based index).
    :type studyNum: int
    :param pathToISATABFile: The path to the ISATAB file
    :type pathToISATABFile: string
    :raise FileNotFoundError: If pathToISATABFile does not contain file 'i_Investigation.txt'.
    """
    from isatools import isatab
    import os
    try:
        isa = isatab.load(pathToISATABFile, skip_load_tables=True)
        std = isa.studies[studyNum - 1]
        assays = std.assays
        if os.path.isfile(os.path.join(pathToISATABFile,assays[assayNum - 1].filename)):
            os.remove(os.path.join(pathToISATABFile,assays[assayNum - 1].filename))
        del assays[assayNum - 1]
        isatab.dump(isa_obj=isa, output_path=pathToISATABFile)
    except FileNotFoundError as err:
        raise err
예제 #4
0
def appendAssayToStudy(assay, studyNum, pathToISATABFile):
    """
    This function appends an Assay object to a study in an ISA file
    Typically, you should use the exploreISA function to check the contents
    of the ISA file and retrieve the assay and study number you are interested in!
    :param assay: The Assay
    :type assay: ISA Assay object
    :param studyNum: The Study number (notice it's not zero-based index).
    :type studyNum: int
    :param pathToISATABFile: The path to the ISATAB file
    :type pathToISATABFile: string
    :raise FileNotFoundError: If pathToISATABFile does not contain file 'i_Investigation.txt'.
    """
    from isatools import isatab
    try:
        isa = isatab.load(pathToISATABFile, skip_load_tables=True)
        std = isa.studies[studyNum - 1]
        lngth = len(std.assays)
        base  = os.path.basename(assay.filename)
        fname = os.path.splitext(base)[0]
        fname = fname + str(lngth)
        ext   = os.path.splitext(base)[1]
        fname = fname + ext
        assay.filename = fname
        isa.studies[studyNum - 1].assays.append(assay)
        isatab.dump(isa_obj=isa, output_path=pathToISATABFile)
    except FileNotFoundError as err:
        raise err
예제 #5
0
def dropStudyFromISA(studyNum, pathToISATABFile):
    """
    This function removes a study from an ISA file
    Typically, you should use the exploreISA function to check the contents
    of the ISA file and retrieve the study number you are interested in!
    Warning: this function deletes the given study and all its associated assays
    :param studyNum: The Study number (notice it's 1-based index).
    :type studyNum: int
    :param pathToISATABFile: The path to the ISATAB file
    :type pathToISATABFile: string
    :raise FileNotFoundError: If pathToISATABFile does not contain file 'i_Investigation.txt'.
    """
    from isatools import isatab
    import os
    try:
        isa = isatab.load(pathToISATABFile, skip_load_tables=True)
        studies = isa.studies
        for assay in studies[studyNum - 1].assays:
            if os.path.isfile(os.path.join(pathToISATABFile,assay.filename)):
                os.remove(os.path.join(pathToISATABFile,assay.filename))
        if os.path.isfile(os.path.join(pathToISATABFile,studies[studyNum - 1].filename)):
            os.remove(os.path.join(pathToISATABFile,studies[studyNum - 1].filename))
        del studies[studyNum - 1]
        isatab.dump(isa_obj=isa, output_path=pathToISATABFile)
    except FileNotFoundError as err:
        raise err
예제 #6
0
def convert(source_idf_fp, output_path, technology_type=None, measurement_type=None):
    """ Converter for MAGE-TAB to ISA-Tab
    :param source_idf_fp: File descriptor of input IDF file
    :param output_path: Path to directory to write output ISA-Tab files to
    """
    df = pd.read_csv(source_idf_fp, names=range(0, 128), sep='\t', engine='python', encoding='utf-8', comment='#').dropna(axis=1, how='all')
    df = df.T  # transpose
    df.reset_index(inplace=True)  # Reset index so it is accessible as column
    df.columns = df.iloc[0]  # If all was OK, promote this row to the column headers
    df = df.reindex(df.index.drop(0))
    # second set output s_ and a_ files
    for _, row in df.iterrows():
        sdrf_file = row["SDRF File"]
        if isinstance(sdrf_file, str):
            study_df, assay_df = magetab.split_tables(sdrf_path=os.path.join(os.path.dirname(source_idf_fp.name),
                                                                             sdrf_file))
            study_df.columns = study_df.isatab_header
            assay_df.columns = assay_df.isatab_header
            # write out ISA table files
            print("Writing s_{0} to {1}".format(os.path.basename(sdrf_file), output_path))
            with open(os.path.join(output_path, "s_" + os.path.basename(sdrf_file)), "w") as s_fp:
                study_df.to_csv(path_or_buf=s_fp, mode='a', sep='\t', encoding='utf-8', index=False)
            print("Writing a_{0} to {1}".format(os.path.basename(sdrf_file), output_path))
            with open(os.path.join(output_path, "a_" + os.path.basename(sdrf_file)), "w") as a_fp:
                assay_df.to_csv(path_or_buf=a_fp, mode='a', sep='\t', encoding='utf-8', index=False)
    print("Writing {0} to {1}".format("i_investigation.txt", output_path))
    source_idf_fp.seek(0)
    ISA = magetab.parse_idf(source_idf_fp.name, technology_type=technology_type, measurement_type=measurement_type)
    isatab.dump(ISA, output_path=output_path, skip_dump_tables=True)
예제 #7
0
def appendStudytoISA(study, pathToISATABFile):
    """
    This function appends a Study object to an ISA file
    Typically, you should use the exploreISA function to check the contents
    of the ISA file!
    :param study: The Study object.
    :type study: ISA Study object
    :param pathToISATABFile: The path to the ISATAB file
    :type pathToISATABFile: string
    :raise FileNotFoundError: If pathToISATABFile does not contain file 'i_Investigation.txt'.
    """
    from isatools import isatab
    import os
    try:
        isa = isatab.load(pathToISATABFile, skip_load_tables=True)
        lngth = len(isa.studies)
        base = os.path.basename(study.filename)
        fname = os.path.splitext(base)[0]
        fname = fname + str(lngth)
        ext = os.path.splitext(base)[1]
        fname = fname + ext
        study.filename = fname
        isa.studies.append(study)
        isatab.dump(isa_obj=isa, output_path=pathToISATABFile)
    except FileNotFoundError as err:
        raise err
예제 #8
0
def convert(source_sampletab_fp, target_dir):
    """ Converter for ISA-JSON to SampleTab.
    :param source_sampletab_fp: File descriptor of input SampleTab file
    :param target_dir: Path to write out ISA-Tab files to
    """
    ISA = sampletab.load(source_sampletab_fp)
    isatab.dump(ISA, target_dir)
예제 #9
0
    def write_isa_study(self, inv_obj, api_key, std_path,
                        save_investigation_copy=True, save_samples_copy=False, save_assays_copy=False):
        """
        Write back an ISA-API Investigation object directly into ISA-Tab files
        :param inv_obj: ISA-API Investigation object
        :param api_key: User API key for accession check
        :param std_path: file system path to destination folder
        :param save_investigation_copy: Keep track of changes saving a copy of the unmodified i_*.txt file
        :param save_samples_copy: Keep track of changes saving a copy of the unmodified s_*.txt file
        :param save_assays_copy: Keep track of changes saving a copy of the unmodified a_*.txt and m_*.tsv files
        :return:
        """
        # dest folder name is a timestamp
        update_path_suffix = app.config.get('UPDATE_PATH_SUFFIX')
        update_path = os.path.join(std_path, update_path_suffix)
        if save_investigation_copy or save_samples_copy or save_assays_copy:  # Only create audit folder when requested
            dest_path = new_timestamped_folder(update_path)

            # make a copy before applying changes
            if save_investigation_copy:
                src_file = os.path.join(std_path, self.inv_filename)
                dest_file = os.path.join(dest_path, self.inv_filename)
                logger.info("Copying %s to %s", src_file, dest_file)
                copy_file(src_file, dest_file)

            if save_samples_copy:
                for sample_file in glob.glob(os.path.join(std_path, "s_*.txt")):
                    sample_file_name = os.path.basename(sample_file)
                    src_file = sample_file
                    dest_file = os.path.join(dest_path, sample_file_name)
                    logger.info("Copying %s to %s", src_file, dest_file)
                    copy_file(src_file, dest_file)

            if save_assays_copy:
                for assay_file in glob.glob(os.path.join(std_path, "a_*.txt")):
                    assay_file_name = os.path.basename(assay_file)
                    src_file = assay_file
                    dest_file = os.path.join(dest_path, assay_file_name)
                    logger.info("Copying %s to %s", src_file, dest_file)
                    copy_file(src_file, dest_file)
                # Save the MAF
                for maf in glob.glob(os.path.join(std_path, "m_*.tsv")):
                    maf_file_name = os.path.basename(maf)
                    src_file = maf
                    dest_file = os.path.join(dest_path, maf_file_name)
                    logger.info("Copying %s to %s", src_file, dest_file)
                    copy_file(src_file, dest_file)

        logger.info("Writing %s to %s", self.inv_filename, std_path)
        i_file_name = self.inv_filename
        dump(inv_obj, std_path, i_file_name=i_file_name, skip_dump_tables=False)

        return
예제 #10
0
def convert(json_fp,
            path,
            i_file_name='i_investigation.txt',
            config_dir=isajson.default_config_dir,
            validate_first=True):
    """
    Converter for ISA JSON to ISA Tab. Currently only converts
    investigation file contents
        :param json_fp: File pointer to ISA JSON input
        :param path: Directory to ISA tab output
        :param i_file_name: Investigation file name, default is
                            i_investigation.txt
        :param config_dir: Directory to config directory
        :param validate_first: Validate JSON before conversion, default is True

    Example usage:
        Read from a JSON and write to an investigation file, make sure to
        create/open relevant Python file objects.

        from isatools.convert import json2isatab
        json_file = open('BII-I-1.json', 'r')
        tab_file = open('i_investigation.txt', 'w')
        json2isatab.convert(json_file, path)

    """
    if validate_first:
        log.info("Validating input JSON before conversion")
        report = isajson.validate(fp=json_fp,
                                  config_dir=config_dir,
                                  log_level=logging.ERROR)
        if len(report['errors']) > 0:
            log.fatal("Could not proceed with conversion as there are some "
                      "fatal validation errors. Check log.")
            return
        json_fp.seek(0)  # reset file pointer after validation
    log.info("Loading ISA-JSON from %s", json_fp.name)
    isa_obj = isajson.load(fp=json_fp)
    log.info("Dumping ISA-Tab to %s", path)
    log.debug("Using configuration from %s", config_dir)
    isatab.dump(isa_obj=isa_obj, output_path=path, i_file_name=i_file_name)
    #  copy data files across from source directory where JSON is located
    log.info("Copying data files from source to target")
    for file in [
            f for f in os.listdir(os.path.dirname(json_fp.name))
            if not (f.endswith('.txt') and (f.startswith('i_') or f.startswith(
                's_') or f.startswith('a_'))) and not (f.endswith('.json'))
    ]:
        filepath = os.path.join(os.path.dirname(json_fp.name), file)
        if os.path.isfile(filepath):
            log.debug("Copying %s to %s", filepath, path)
            shutil.copy(filepath, path)
예제 #11
0
def measure_minimal(n_rows):
    starting_time = time.process_time_ns()
    investigation = Investigation()
    investigation.identifier = "i1"

    study = Study(filename="s_study.txt")
    study.identifier = "s1"
    investigation.studies.append(study)

    sample_collection_protocol = Protocol(name="sample collection")
    study.protocols.append(sample_collection_protocol)

    for i in range(0, n_rows):
        source = Source(name='source_material-{}'.format(i))
        sample = Sample(name="sample_material-{}".format(i))
        study.samples.append(sample)
        study.sources.append(source)

        sample_collection_process = Process(
            executes_protocol=sample_collection_protocol)
        sample_collection_process.inputs.append(source)
        sample_collection_process.outputs.append(sample)

        study.process_sequence.append(sample_collection_process)

    assay = Assay(filename="a_assay.txt")
    sequencing_protocol = Protocol(
        name='sequencing',
        protocol_type=OntologyAnnotation(term="material sequencing"))
    study.protocols.append(sequencing_protocol)

    for i, sample in enumerate(study.samples):

        sequencing_process = Process(executes_protocol=sequencing_protocol)
        sequencing_process.name = "assay-name-{}".format(i)
        sequencing_process.inputs.append(sample)

        datafile = DataFile(filename="sequenced-data-{}".format(i),
                            label="Raw Data File",
                            generated_from=[sample])
        sequencing_process.outputs.append(datafile)

        assay.samples.append(sample)
        assay.data_files.append(datafile)
        assay.process_sequence.append(sequencing_process)

    study.assays.append(assay)

    isatab.dump(investigation, "./")
    return time.process_time_ns() - starting_time
예제 #12
0
    def remove_unused_protocols(self):
        """Removes usused protocols

        :return: None
        """
        investigation = isatab.load(os.path.dirname(self.path))
        for study in investigation.studies:
            unused_protocol_names = set(x.name for x in study.protocols)
            for process in study.process_sequence:
                try:
                    unused_protocol_names.remove(
                        process.executes_protocol.name)
                except KeyError:
                    pass
            for assay in study.assays:
                for process in assay.process_sequence:
                    try:
                        unused_protocol_names.remove(
                            process.executes_protocol.name)
                    except KeyError:
                        pass
            print('Unused protocols: {}'.format(unused_protocol_names))
            print('Location of unused protocols: {}'.format(
                list(
                    map(
                        lambda pr: True
                        if pr.name in unused_protocol_names else False,
                        study.protocols))))
            # remove these protocols from study.protocols
            """
            clean_protocols_list = []
            for protocol in study.protocols:
                if protocol.name not in unused_protocol_names:
                    clean_protocols_list.append(protocol)
            study.protocols = clean_protocols_list
            """
            clean_protocols = [
                pr for pr in study.protocols
                if pr.name not in unused_protocol_names
            ]
            print('Clean protocol list: {}'.format(
                [pr.name for pr in clean_protocols]))
            study.protocols = clean_protocols
            print('Clean study.protocols: {}'.format(
                [pr.name for pr in study.protocols]))
        isatab.dump(investigation,
                    output_path=os.path.dirname(self.path),
                    i_file_name='{filename}.fix'.format(
                        filename=os.path.basename(self.path)),
                    skip_dump_tables=True)
예제 #13
0
def convert(idf_file_path, output_path):
    """ Converter for MAGE-TAB to ISA-Tab
    :param idf_file_path: File descriptor of input IDF file
    :param output_path: Path to directory to write output ISA-Tab files to
    """
    parser = MageTabParser()
    parser.parse_idf(idf_file_path)
    sdrf_files = [x.value for x in parser.ISA.studies[-1].comments if 'SDRF File' in x.name]
    if len(sdrf_files) == 1:
        sdrf_files = sdrf_files[0].split(';')
        for sdrf_file in sdrf_files:
            table_files = parser.parse_sdrf_to_isa_table_files(os.path.join(os.path.dirname(idf_file_path), sdrf_file))
            for in_fp in table_files:
                log.info("Writing {0} to {1}".format(in_fp.name, output_path))
                with open(os.path.join(output_path, in_fp.name), 'w') as out_fp:
                    out_fp.write(in_fp.read())
    log.info("Writing {0} to {1}".format("i_investigation.txt", output_path))
    isatab.dump(parser.ISA, output_path=output_path, skip_dump_tables=True)
    def _write_study_json(self, inv_obj, std_path, skip_dump_tables=True):
        logger.info("Writing %s to %s", self.inv_filename, std_path)
        try:
            os.makedirs(std_path)
        except OSError as exception:
            if exception.errno != errno.EEXIST:
                raise
        inv = dump(inv_obj,
                   std_path,
                   i_file_name=self.inv_filename,
                   skip_dump_tables=skip_dump_tables)

        return inv
예제 #15
0
    def replace_factor_with_protocol_parameter_value(self, factor_name,
                                                     protocol_ref):
        """Fixes a factor if it's supposed to be a Parameter Value

        :param factor_name: The factor that's incorrect
        :param protocol_ref: Protocol REF for the new Parameter Value
        :return: None
        """
        table_file_df = isatab.read_tfile(self.path)

        field_names = list(table_file_df.columns)
        clean_field_names = self.clean_isatab_field_names(field_names)

        factor_index = clean_field_names.index(
            'Factor Value[{factor_name}]'.format(factor_name=factor_name))

        with open(self.path) as tfile_fp:
            next(tfile_fp)
            line1 = next(tfile_fp)
            protocol_ref_index = list(
                map(lambda x: x[1:-1] if x[0] == '"' and x[-1] == '"' else x,
                    line1.split('\t'))).index(protocol_ref)

        if protocol_ref_index < 0:
            raise IOError(
                'Could not find protocol ref matching {protocol_ref}'.format(
                    protocol_ref=protocol_ref))

        if factor_index < len(field_names) and \
            'Term Source REF' in field_names[factor_index + 1] and \
                'Term Accession' in field_names[factor_index + 2]:
            log.debug('Moving Factor Value[{}] with term columns'.format(
                factor_name))
            # move Factor Value and Term Source REF and Term Accession columns
            field_names.insert(protocol_ref_index + 1,
                               field_names[factor_index])
            field_names.insert(protocol_ref_index + 2,
                               field_names[factor_index + 1 + 1])
            field_names.insert(protocol_ref_index + 3,
                               field_names[factor_index + 2 + 2])
            del field_names[factor_index + 3]  # del Factor Value[{}]
            del field_names[factor_index + 1 + 2]  # del Term Source REF
            del field_names[factor_index + 2 + 1]  # del Term Accession
        elif factor_index < len(field_names) and \
            'Unit' in field_names[factor_index + 1] and \
                'Term Source REF' in field_names[factor_index + 2] and \
                'Term Accession' in field_names[factor_index + 3]:
            log.debug(
                'Moving Factor Value[{factor_name}] with unit term columns'.
                format(factor_name=factor_name))
            # move Factor Value and Unit as ontology annotation
            field_names.insert(protocol_ref_index + 1,
                               field_names[factor_index])
            field_names.insert(protocol_ref_index + 2,
                               field_names[factor_index + 1 + 1])
            field_names.insert(protocol_ref_index + 3,
                               field_names[factor_index + 2 + 2])
            field_names.insert(protocol_ref_index + 4,
                               field_names[factor_index + 3 + 3])
            del field_names[factor_index + 4]  # del Factor Value[{}]
            del field_names[factor_index + 1 + 3]  # del Unit
            del field_names[factor_index + 2 + 2]  # del Term Source REF
            del field_names[factor_index + 3 + 1]  # del Term Accession
        elif factor_index < len(field_names) and \
                'Unit' in field_names[factor_index + 1]:
            log.debug(
                'Moving Factor Value[{factor_name}] with unit column'.format(
                    factor_name=factor_name))
            # move Factor Value and Unit columns
            field_names.insert(protocol_ref_index + 1,
                               field_names[factor_index])
            field_names.insert(protocol_ref_index + 2,
                               field_names[factor_index + 1 + 1])
            del field_names[factor_index + 2]  # del Factor Value[{}]
            del field_names[factor_index + 1 + 1]  # del Unit
        else:  # move only the Factor Value column
            log.debug('Moving Factor Value[{factor_name}]'.format(
                factor_name=factor_name))
            field_names.insert(protocol_ref_index + 1,
                               field_names[factor_index])
            del field_names[factor_index]  # del Factor Value[{}]

        table_file_df.columns = self.clean_isatab_field_names(field_names)

        # Rename Factor Value column to Parameter Value column
        field_names_modified = list(table_file_df.columns)
        field_names_modified[protocol_ref_index + 1] = \
            field_names_modified[protocol_ref_index + 1].replace(
                'Factor Value', 'Parameter Value')
        table_file_df.columns = self.clean_isatab_field_names(
            field_names_modified)

        investigation = isatab.load(os.path.dirname(self.path),
                                    skip_load_tables=True)
        study = investigation.studies[-1]
        protocol = study.get_prot(protocol_ref)
        if protocol is None:
            raise ISAModelAttributeError(
                'No protocol with name {protocol_ref} was found'.format(
                    protocol_ref=protocol_ref))
        protocol.add_param(factor_name)
        factor = study.get_factor(factor_name)
        if factor is None:
            raise ISAModelAttributeError(
                'No factor with name {factor_name} was found'.format(
                    factor_name=factor_name))
        else:
            study.del_factor(name=factor_name, are_you_sure=True)

        study.filename = '{study_filename}.fix'.format(
            study_filename=study.filename)

        isatab.dump(investigation,
                    output_path=os.path.dirname(self.path),
                    i_file_name='i_Investigation.txt.fix',
                    skip_dump_tables=True)

        with open(
                os.path.join(
                    os.path.dirname(self.path), '{s_filename}.fix'.format(
                        s_filename=os.path.basename(self.path))),
                'w') as out_fp:
            table_file_df.to_csv(path_or_buf=out_fp,
                                 index=False,
                                 sep='\t',
                                 encoding='utf-8')
예제 #16
0
def create_from_galaxy_parameters(galaxy_parameters_file, target_dir):

    def _create_treatment_sequence(galaxy_parameters):
        treatment_plan = galaxy_parameters['treatment_plan']
        study_type = treatment_plan['study_type']['study_type_selector']
        log.debug(json.dumps(galaxy_parameters, indent=4))
        try:
            single_or_multiple = treatment_plan['study_type']['balance'][
                'multiple_interventions']
        except KeyError:
            single_or_multiple = \
                treatment_plan['study_type']['multiple_interventions'][
                    'multiple_interventions_selector']
        if single_or_multiple == 'multiple':
            raise NotImplementedError(
                'Multiple treatments not yet implemented. Please select Single')

        if study_type == 'full_factorial':
            intervention_type = \
            treatment_plan['study_type']['multiple_interventions'][
                'intervention_type']['intervention_type_selector']
            if intervention_type == 'chemical intervention':
                interventions = INTERVENTIONS['CHEMICAL']
            elif intervention_type == 'dietary intervention':
                interventions = INTERVENTIONS['DIET']
            elif intervention_type == 'behavioural intervention':
                interventions = INTERVENTIONS['BEHAVIOURAL']
            elif intervention_type == 'biological intervention':
                interventions = INTERVENTIONS['BIOLOGICAL']
            elif intervention_type == 'surgical intervention':
                interventions = INTERVENTIONS['SURGICAL']
            elif intervention_type == 'radiological intervention':  # not in tool yet
                interventions = INTERVENTIONS['RADIOLOGICAL']
            else:  # default to chemical
                interventions = INTERVENTIONS['CHEMICAL']
            treatment_factory = TreatmentFactory(
                intervention_type=interventions, factors=BASE_FACTORS)

            # Treatment Sequence
            agent_levels = \
            treatment_plan['study_type']['multiple_interventions'][
                'intervention_type']['agent'].split(',')
            for agent_level in agent_levels:
                treatment_factory.add_factor_value(BASE_FACTORS[0],
                                                   agent_level.strip())
            dose_levels = \
            treatment_plan['study_type']['multiple_interventions'][
                'intervention_type']['intensity'].split(',')
            for dose_level in dose_levels:
                treatment_factory.add_factor_value(BASE_FACTORS[1],
                                                   dose_level.strip())
            duration_of_exposure_levels = treatment_plan[
                'study_type']['multiple_interventions']['intervention_type'][
                'duration'].split(',')
            for duration_of_exposure_level in duration_of_exposure_levels:
                treatment_factory.add_factor_value(
                    BASE_FACTORS[2], duration_of_exposure_level.strip())
            treatment_sequence = TreatmentSequence(
                ranked_treatments=treatment_factory
                    .compute_full_factorial_design())
            group_size = int(
                galaxy_parameters['treatment_plan']['study_type'][
                    'multiple_interventions']['group_size'])
            for ranked_treatment in \
                    treatment_sequence.ranked_treatments:
                ranked_treatment[0].group_size = group_size
            return treatment_sequence

        elif study_type == 'fractional_factorial':
            intervention_type = \
                treatment_plan['study_type']['balance'][
                    'multiple_interventions']['intervention_type_selector']
            treatments = set()
            study_factors = [StudyFactor(name=x.strip()) for x in
                             treatment_plan['study_type'][
                                 'balance']['multiple_interventions'][
                                 'study_factors'].split(',')]
            for group in \
                    treatment_plan['study_type']['balance'][
                        'multiple_interventions']['study_groups']:
                factor_values = ()
                for x, y in zip(study_factors, [x.strip() for x in
                                                group['factor_values'].split(
                                                    ',')]):
                    factor_value = FactorValue(factor_name=x, value=y)
                    factor_values = factor_values + (factor_value,)
                if galaxy_parameters['treatment_plan']['study_type'][
                    'balance']['balanced_groups']:
                    group_size = int(
                        galaxy_parameters['treatment_plan']['study_type'][
                            'balance']['multiple_interventions']['group_size'])
                else:
                    group_size = int(group['group_size'])
                treatment = Treatment(treatment_type=intervention_type,
                    factor_values=factor_values, group_size=group_size)
                treatments.add(treatment)
            treatment_sequence = TreatmentSequence(ranked_treatments=treatments)
            return treatment_sequence

    def _create_sample_plan(sample_assay_plan, sample_plan_record):

        def _create_nmr_assay_type(assay_plan_record):
            nmr_assay_type = AssayType(
                measurement_type='metabolite profiling',
                technology_type='nmr spectroscopy')
            nmr_top_mods = NMRTopologyModifiers()
            nmr_top_mods.technical_replicates = assay_plan_record[
                'assay_type']['acquisition_mode']['technical_replicates']
            nmr_top_mods.acquisition_modes.add(
                assay_plan_record['assay_type'][
                    'acquisition_mode']['acquisition_mode_selector'])
            nmr_top_mods.instruments.add('{} {}'.format(
                assay_plan_record['assay_type'][
                    'acquisition_mode']['nmr_instrument'],
                assay_plan_record['assay_type']['acquisition_mode']['magnet']))
            nmr_top_mods.pulse_sequences.add(
                assay_plan_record['assay_type'][
                    'acquisition_mode']['pulse_sequence']
            )
            nmr_top_mods.magnet_power = \
                assay_plan_record['assay_type']['acquisition_mode']['magnet']
            nmr_assay_type.topology_modifiers = nmr_top_mods
            return nmr_assay_type

        def _create_ms_assay_type(assay_plan_record):
            ms_assay_type = AssayType(
                measurement_type='metabolite profiling',
                technology_type='mass spectrometry')
            ms_assay_type.topology_modifiers = MSTopologyModifiers(
                sample_fractions=set(map(
                    lambda x: x['sample_fraction'],
                    assay_plan_record['assay_type']['sample_fractions'])))
            injection_modes = ms_assay_type.topology_modifiers.injection_modes
            if len(assay_plan_record['assay_type']['injections']) > 0:
                for inj_mod in assay_plan_record['assay_type']['injections']:
                    injection_mode = MSInjectionMode(
                        injection_mode=inj_mod[
                            'injection_mode']['injection_mode_selector'],
                        ms_instrument=inj_mod['injection_mode']['instrument']
                    )
                    if inj_mod['injection_mode'][
                        'injection_mode_selector'] in ('LC', 'GC'):
                        injection_mode.chromatography_instrument = inj_mod[
                            'injection_mode']['chromatography_instrument']
                    if inj_mod[
                        'injection_mode']['injection_mode_selector'] == 'LC':
                        injection_mode.chromatography_column = inj_mod[
                            'injection_mode']['chromatography_column']
                    injection_modes.add(injection_mode)
                    for acq_mod in inj_mod['injection_mode']['acquisitions']:
                        injection_mode.acquisition_modes.add(
                            MSAcquisitionMode(
                                acquisition_method=acq_mod['acquisition_mode'],
                                technical_repeats=acq_mod[
                                    'technical_replicates']
                            )
                        )
                        if inj_mod['injection_mode'][
                            'injection_mode_selector'] == 'GC':
                            for deriva in inj_mod['injection_mode'][
                                    'derivatizations']:
                                derivatization = deriva['derivatization']
                                if re.match('(.*?) \((.*?)\)', derivatization):
                                    matches = next(iter(
                                        re.findall('(.*?) \((.*?)\)',
                                                   derivatization)))
                                    term, ontoid = matches[0], matches[1]
                                    source_name, accession_id = \
                                    ontoid.split(':')[0], \
                                    ontoid.split(':')[1]
                                    source = OntologySource(name=source_name)
                                    derivatization = OntologyAnnotation(
                                        term=term, term_source=source,
                                        term_accession=accession_id)
                                injection_mode.derivatizations.add(
                                    derivatization)
            return ms_assay_type

        if sample_plan_record['material_type'] == 'user defined':
            sample_type = sample_plan_record['material_type']['sample_type_ud']
        else:
            sample_type = sample_plan_record['material_type']
            if re.match('(.*?) \((.*?)\)', sample_type):
                matches = next(iter(re.findall('(.*?) \((.*?)\)', sample_type)))
                term, ontoid = matches[0], matches[1]
                source_name, accession_id = ontoid.split(':')[0], \
                                            ontoid.split(':')[1]
                source = OntologySource(name=source_name)
                sample_type = OntologyAnnotation(term=term, term_source=source,
                                                 term_accession=accession_id)
        sample_assay_plan.add_sample_type(sample_type)
        sample_size = sample_plan_record['sample_collections']
        sample_assay_plan.add_sample_plan_record(sample_type, sample_size)
        for assay_plan_record in sample_plan_record['assay_plans']:
            tt = assay_plan_record['assay_type']['assay_type_selector']
            if tt == 'nmr':
                assay_type = _create_nmr_assay_type(assay_plan_record)
            elif tt == 'ms':
                assay_type = _create_ms_assay_type(assay_plan_record)
            else:
                raise NotImplementedError('Only MS and NMR assays supported')
            sample_assay_plan.add_assay_type(assay_type)
            sample_assay_plan.add_assay_plan_record(sample_type, assay_type)
        return sample_assay_plan

    def _inject_qcqa_plan(sample_assay_plan, qcqa_record):
        qc_type = qcqa_record['qc_type']['qc_type_selector']
        if qc_type == 'interval_series':
            material_type = qcqa_record['material_type']
            if re.match('(.*?) \((.*?)\)', material_type):
                matches = next(iter(
                    re.findall('(.*?) \((.*?)\)', material_type)))
                term, ontoid = matches[0], matches[1]
                source_name, accession_id = ontoid.split(':')[0], \
                                            ontoid.split(':')[1]
                source = OntologySource(name=source_name)
                material_type = OntologyAnnotation(
                    term=term, term_source=source, term_accession=accession_id)
            sample_assay_plan.add_sample_qc_plan_record(
                material_type=material_type,
                injection_interval=qcqa_record[
                    'qc_type']['injection_frequency'])
        elif 'dilution_series' in qc_type:
            values = [int(x) for x in qcqa_record[
                'qc_type']['values'].split(',')]
            material_type = qcqa_record['material_type']
            if re.match('(.*?) \((.*?)\)', material_type):
                matches = next(iter(
                    re.findall('(.*?) \((.*?)\)', material_type)))
                term, ontoid = matches[0], matches[1]
                source_name, accession_id = ontoid.split(':')[0], \
                                            ontoid.split(':')[1]
                source = OntologySource(name=source_name)
                material_type = OntologyAnnotation(
                    term=term, term_source=source, term_accession=accession_id)
            batch = SampleQCBatch(material=material_type)
            for value in values:
                batch.characteristic_values.append(
                    Characteristic(category=OntologyAnnotation(
                        term='quantity'), value=value)
                )
            if 'pre' in qc_type:
                sample_assay_plan.pre_run_batch = batch
            elif 'post' in qc_type:
                sample_assay_plan.post_run_batch = batch
        else:
            raise NotImplementedError('QC type not recognized!')

        return sample_assay_plan

    # pre-generation checks
    if galaxy_parameters_file:
        galaxy_parameters = json.load(galaxy_parameters_file)
        log.debug(json.dumps(galaxy_parameters, indent=4))
    else:
        raise IOError('Could not load Galaxy parameters file!')
    if target_dir:
        if not os.path.exists(target_dir):
            raise IOError('Target path does not exist!')
    if len(galaxy_parameters['sample_and_assay_planning']['sample_plans']) == 0:
        raise IOError('No Sampling plan specified')

    treatment_sequence = _create_treatment_sequence(galaxy_parameters)
    sample_assay_plan = SampleAssayPlan()
    for sample_plan_record in galaxy_parameters['sample_and_assay_planning'][
            'sample_plans']:
        _ = _create_sample_plan(sample_assay_plan, sample_plan_record)
    for qcqa_record in galaxy_parameters['qc_planning']['qc_plans']:
        _ = _inject_qcqa_plan(sample_assay_plan, qcqa_record)
    try:
        sample_assay_plan.group_size = \
            int(galaxy_parameters['treatment_plan']['study_type'][
                'multiple_interventions']['group_size'])
    except KeyError:
        try:
            sample_assay_plan.group_size = \
                int(galaxy_parameters['treatment_plan']['study_type'][
                    'balance']['multiple_interventions']['group_size'])
        except KeyError:
            log.debug(
                'Group size not set for root plan as multiple intervention')
            sample_assay_plan.group_size = 0  # raises AttributeError

    study_info = galaxy_parameters['study_metadata']

    if len(sample_assay_plan.sample_plan) == 0:
        log.info('No sample plan defined')
    if len(sample_assay_plan.assay_plan) == 0:
        log.info('No assay plan defined')

    study_design = StudyDesign()
    study_design.add_single_sequence_plan(treatment_sequence, sample_assay_plan)
    isa_object_factory = IsaModelObjectFactory(study_design)
    if len(sample_assay_plan.sample_plan) == 0:
        s = Study()
    else:
        s = isa_object_factory.create_assays_from_plan()

    c = Person()
    c.affiliation = study_info.get('affiliation')
    c.last_name = study_info.get('last_name')
    c.email = study_info['email']
    c.first_name = study_info['first_name']
    s.contacts = [c]
    s.description = study_info['description']
    s.filename = 's_study.txt'
    s.title = study_info['title']
    s.identifier = 'ISA-{}'.format(uuid.uuid4().hex[:8])
    s.comments = [
        Comment(name='Consent Information (ICO:0000011)',
                value=study_info['study_consent']),
        Comment(name='Data Use Requirement (DUO:0000017)',
                value=study_info['study_use_condition'])
    ]
    i = Investigation()
    i.contacts = [c]
    i.description = ""
    i.title = "Investigation"
    i.identifier = s.identifier
    i.studies = [s]
    try:
        i.ontology_source_references = s.ontology_source_references
    except AttributeError:
        pass
    i.ontology_source_references.append(OntologySource(name='ICO'))
    i.ontology_source_references.append(OntologySource(name='DUO'))

    def sanitize_filename(filename):
        filename = str(filename).strip().replace(' ', '_')
        filename = re.sub(r'(?u)[^-\w.]', '_', filename)
        return filename

    i.filename = sanitize_filename(i.filename)
    for s in i.studies:
        s.filename = sanitize_filename(s.filename)
        for a in s.assays:
            a.filename = sanitize_filename(a.filename)

    isatab.dump(isa_obj=i, output_path=target_dir)
예제 #17
0
    def write_isa_study(self,
                        inv_obj,
                        api_key,
                        std_path,
                        save_investigation_copy=True,
                        save_samples_copy=False,
                        save_assays_copy=False):
        """
        Write back an ISA-API Investigation object directly into ISA-Tab files
        :param inv_obj: ISA-API Investigation object
        :param api_key: User API key for accession check
        :param std_path: file system path to destination folder
        :param save_investigation_copy: Keep track of changes saving a copy of the unmodified i_*.txt file
        :param save_samples_copy: Keep track of changes saving a copy of the unmodified s_*.txt file
        :param save_assays_copy: Keep track of changes saving a copy of the unmodified a_*.txt and m_*.tsv files
        :return:
        """
        # dest folder name is a timestamp
        update_path_suffix = app.config.get('UPDATE_PATH_SUFFIX')
        update_path = os.path.join(std_path, update_path_suffix)
        if save_investigation_copy or save_samples_copy or save_assays_copy:  # Only create audit folder when requested
            dest_path = new_timestamped_folder(update_path)

            # make a copy before applying changes
            if save_investigation_copy:
                src_file = os.path.join(std_path, self.inv_filename)
                dest_file = os.path.join(dest_path, self.inv_filename)
                logger.info("Copying %s to %s", src_file, dest_file)
                copy_file(src_file, dest_file)

            if save_samples_copy:
                for sample_file in glob.glob(os.path.join(std_path,
                                                          "s_*.txt")):
                    sample_file_name = os.path.basename(sample_file)
                    src_file = sample_file
                    dest_file = os.path.join(dest_path, sample_file_name)
                    logger.info("Copying %s to %s", src_file, dest_file)
                    copy_file(src_file, dest_file)

            if save_assays_copy:
                for assay_file in glob.glob(os.path.join(std_path, "a_*.txt")):
                    assay_file_name = os.path.basename(assay_file)
                    src_file = assay_file
                    dest_file = os.path.join(dest_path, assay_file_name)
                    logger.info("Copying %s to %s", src_file, dest_file)
                    copy_file(src_file, dest_file)
                # Save the MAF
                for maf in glob.glob(os.path.join(std_path, "m_*.tsv")):
                    maf_file_name = os.path.basename(maf)
                    src_file = maf
                    dest_file = os.path.join(dest_path, maf_file_name)
                    logger.info("Copying %s to %s", src_file, dest_file)
                    copy_file(src_file, dest_file)

        logger.info("Writing %s to %s", self.inv_filename, std_path)
        i_file_name = self.inv_filename
        dump(inv_obj,
             std_path,
             i_file_name=i_file_name,
             skip_dump_tables=False)

        return
예제 #18
0
 def test_isatab_bad_i_file_name(self):
     with self.assertRaises(NameError):
         isatab.dump(Investigation(), self._tmp_dir, i_file_name="investigation.txt")
예제 #19
0
    def test_isatab_dump_source_sample_split(self):
        i = Investigation()
        uberon = OntologySource(
            name="UBERON",
            description="Uber Anatomy Ontology",
            version="216",
            file="http://data.bioontology.org/ontologies/UBERON",
        )
        ncbitaxon = OntologySource(
            name="NCBITAXON",
            description="National Center for Biotechnology Information (NCBI) Organismal Classification",
            version="2",
            file="http://data.bioontology.org/ontologies/NCBITAXON",
        )
        i.ontology_source_references.append(uberon)
        i.ontology_source_references.append(ncbitaxon)

        s = Study(filename="s_pool.txt")

        sample_collection_protocol = Protocol(
            name="sample collection", protocol_type=OntologyAnnotation(term="sample collection")
        )
        s.protocols.append(sample_collection_protocol)

        reference_descriptor_category = OntologyAnnotation(term="reference descriptor")
        material_type_category = OntologyAnnotation(term="material type")
        organism_category = OntologyAnnotation(term="organism")

        source1 = Source(name="source1")
        source1.characteristics = [
            Characteristic(category=reference_descriptor_category, value="not applicable"),
            Characteristic(category=material_type_category, value="specimen"),
            Characteristic(
                category=organism_category,
                value=OntologyAnnotation(
                    term="Human", term_source=ncbitaxon, term_accession="http://purl.bioontology.org/ontology/STY/T016"
                ),
            ),
        ]

        sample1 = Sample(name="sample1")
        organism_part = OntologyAnnotation(term="organism part")
        sample1.characteristics.append(
            Characteristic(
                category=organism_part,
                value=OntologyAnnotation(
                    term="liver", term_source=uberon, term_accession="http://purl.obolibrary.org/obo/UBERON_0002107"
                ),
            )
        )

        sample2 = Sample(name="sample2")
        sample2.characteristics.append(
            Characteristic(
                category=organism_part,
                value=OntologyAnnotation(
                    term="heart", term_source=uberon, term_accession="http://purl.obolibrary.org/obo/UBERON_0000948"
                ),
            )
        )

        sample3 = Sample(name="sample3")
        sample3.characteristics.append(
            Characteristic(
                category=organism_part,
                value=OntologyAnnotation(
                    term="blood", term_source=uberon, term_accession="http://purl.obolibrary.org/obo/UBERON_0000178"
                ),
            )
        )

        sample4 = Sample(name="sample4")
        sample4.characteristics.append(
            Characteristic(
                category=organism_part,
                value=OntologyAnnotation(
                    term="blood", term_source=uberon, term_accession="http://purl.obolibrary.org/obo/UBERON_0000178"
                ),
            )
        )

        sample_collection_process = Process(executes_protocol=sample_collection_protocol)

        sample_collection_process.inputs = [source1]
        sample_collection_process.outputs = [sample1, sample2, sample3, sample4]
        s.process_sequence = [sample_collection_process]
        from isatools.model.v1 import _build_assay_graph

        s.graph = _build_assay_graph(s.process_sequence)
        i.studies = [s]
        isatab.dump(i, self._tmp_dir)
        self.assertTrue(
            assert_tab_content_equal(
                open(os.path.join(self._tmp_dir, "s_pool.txt")),
                open(os.path.join(self._tab_data_dir, "TEST-ISA-source-split", "s_TEST-Template1-Splitting.txt")),
            )
        )
예제 #20
0
                        output.label]['entry_list'][-1]['value']
                    output.generated_from[-1].name = labels['Sample Name'][
                        'value']
            #  set MS Assay Name to mzML metadata
            ms_process.name = labels['MS Assay Name']['value']

            #  add data transformation to describe conversion to mzML
            if data_trans_meta['Data Transformation Name']:
                if not study.get_prot('Conversion to mzML'):
                    dt_prot = Protocol(name='Conversion to mzML',
                                       protocol_type=OntologyAnnotation(
                                           term='data transformation'))
                    dt_prot.add_param('peak picking')
                    dt_prot.add_param('software')
                    dt_prot.add_param('software version')
                    study.protocols.append(dt_prot)
                dt_prot = study.get_prot('Conversion to mzML')
                dt_process = Process(executes_protocol=dt_prot)
                dt_process.outputs = [
                    DerivedSpectralDataFile(
                        filename=labels['Derived Spectral Data File']
                        ['entry_list'][-1]['value'])
                ]
                dt_process.inputs = ms_process.outputs
                plink(ms_process, dt_process)
                assay.process_sequence.append(dt_process)
        except IndexError:
            pass

isatab.dump(ISA, output_filepath)
예제 #21
0
 def test_isatab_bad_i_file_name(self):
     with self.assertRaises(NameError):
         isatab.dump(Investigation(),
                     self._tmp_dir,
                     i_file_name='investigation.txt')
예제 #22
0
    def test_isatab_dump_source_sample_split(self):
        i = Investigation()
        uberon = OntologySource(
            name='UBERON',
            description="Uber Anatomy Ontology",
            version='216',
            file='http://data.bioontology.org/ontologies/UBERON')
        ncbitaxon = OntologySource(
            name='NCBITAXON',
            description=
            "National Center for Biotechnology Information (NCBI) Organismal Classification",
            version='2',
            file='http://data.bioontology.org/ontologies/NCBITAXON')
        i.ontology_source_references.append(uberon)
        i.ontology_source_references.append(ncbitaxon)

        s = Study(filename='s_pool.txt')

        sample_collection_protocol = Protocol(
            name='sample collection',
            protocol_type=OntologyAnnotation(term='sample collection'))
        s.protocols.append(sample_collection_protocol)

        reference_descriptor_category = OntologyAnnotation(
            term='reference descriptor')
        material_type_category = OntologyAnnotation(term='material type')
        organism_category = OntologyAnnotation(term='organism')

        source1 = Source(name='source1')
        source1.characteristics = [
            Characteristic(category=reference_descriptor_category,
                           value='not applicable'),
            Characteristic(category=material_type_category, value='specimen'),
            Characteristic(
                category=organism_category,
                value=OntologyAnnotation(
                    term='Human',
                    term_source=ncbitaxon,
                    term_accession=
                    'http://purl.bioontology.org/ontology/STY/T016')),
        ]

        sample1 = Sample(name='sample1')
        organism_part = OntologyAnnotation(term='organism part')
        sample1.characteristics.append(
            Characteristic(category=organism_part,
                           value=OntologyAnnotation(
                               term='liver',
                               term_source=uberon,
                               term_accession=
                               'http://purl.obolibrary.org/obo/UBERON_0002107',
                           )))

        sample2 = Sample(name='sample2')
        sample2.characteristics.append(
            Characteristic(category=organism_part,
                           value=OntologyAnnotation(
                               term='heart',
                               term_source=uberon,
                               term_accession=
                               'http://purl.obolibrary.org/obo/UBERON_0000948',
                           )))

        sample3 = Sample(name='sample3')
        sample3.characteristics.append(
            Characteristic(category=organism_part,
                           value=OntologyAnnotation(
                               term='blood',
                               term_source=uberon,
                               term_accession=
                               'http://purl.obolibrary.org/obo/UBERON_0000178',
                           )))

        sample4 = Sample(name='sample4')
        sample4.characteristics.append(
            Characteristic(category=organism_part,
                           value=OntologyAnnotation(
                               term='blood',
                               term_source=uberon,
                               term_accession=
                               'http://purl.obolibrary.org/obo/UBERON_0000178',
                           )))

        sample_collection_process = Process(
            executes_protocol=sample_collection_protocol)

        sample_collection_process.inputs = [source1]
        sample_collection_process.outputs = [
            sample1, sample2, sample3, sample4
        ]
        s.process_sequence = [sample_collection_process]
        from isatools.model.v1 import _build_assay_graph
        s.graph = _build_assay_graph(s.process_sequence)
        i.studies = [s]
        isatab.dump(i, self._tmp_dir)
        self.assertTrue(
            assert_tab_content_equal(
                open(os.path.join(self._tmp_dir, 's_pool.txt')),
                open(
                    os.path.join(self._tab_data_dir, 'TEST-ISA-source-split',
                                 's_TEST-Template1-Splitting.txt'))))
예제 #23
0
def create_from_plan_parameters(galaxy_parameters_file,
                                sample_assay_plans_file, study_info_file,
                                treatment_plans_file, target_dir):
    decoder = SampleAssayPlanDecoder()
    if galaxy_parameters_file:
        galaxy_parameters = json.load(galaxy_parameters_file)
        sample_and_assay_plans, study_info, treatment_plan_params = \
            map_galaxy_to_isa_create_json(galaxy_parameters)
        plan = decoder.load(io.StringIO(json.dumps(sample_and_assay_plans)))
    elif sample_assay_plans_file and study_info_file and treatment_plans_file:
        plan = decoder.load(sample_assay_plans_file)
        study_info = json.load(study_info_file)
        treatment_plan_params = json.load(treatment_plans_file)
    else:
        raise IOError('Wrong parameters provided')

    study_type = treatment_plan_params['study_type_cond']['study_type']
    if study_type != 'intervention':
        raise NotImplementedError('Only supports Intervention studies')

    single_or_multiple = treatment_plan_params['study_type_cond'][
        'one_or_more']['single_or_multiple']
    if single_or_multiple == 'multiple':
        raise NotImplementedError(
            'Multiple treatments not yet implemented. Please select Single')

    intervention_type = treatment_plan_params['study_type_cond'][
        'one_or_more']['intervention_type']['select_intervention_type']
    if intervention_type != 'chemical intervention':
        raise NotImplementedError(
            'Only Chemical Interventions supported at this time')

    treatment_factory = TreatmentFactory(
        intervention_type=INTERVENTIONS['CHEMICAL'], factors=BASE_FACTORS)
    agent_levels = treatment_plan_params['study_type_cond']['one_or_more'][
        'intervention_type']['agent'].split(',')
    for agent_level in agent_levels:
        treatment_factory.add_factor_value(BASE_FACTORS[0],
                                           agent_level.strip())
    dose_levels = treatment_plan_params['study_type_cond']['one_or_more'][
        'intervention_type']['intensity'].split(',')
    for dose_level in dose_levels:
        treatment_factory.add_factor_value(BASE_FACTORS[1], dose_level.strip())
    duration_of_exposure_levels = treatment_plan_params['study_type_cond'][
        'one_or_more']['intervention_type']['duration'].split(',')
    for duration_of_exposure_level in duration_of_exposure_levels:
        treatment_factory.add_factor_value(BASE_FACTORS[2],
                                           duration_of_exposure_level.strip())
    treatment_sequence = TreatmentSequence(
        ranked_treatments=treatment_factory.compute_full_factorial_design())
    isa_object_factory = IsaModelObjectFactory(plan, treatment_sequence)
    s = isa_object_factory.create_assays_from_plan()
    contact = Person()
    contact.affiliation = study_info['study_pi_affiliation']
    contact.last_name = study_info['study_pi_last_name']
    contact.email = study_info['study_pi_email']
    contact.first_name = study_info['study_pi_first_name']
    s.contacts = [contact]
    s.description = study_info['study_description']
    s.filename = 's_study.txt'
    s.title = 'ISA created {}'.format(datetime.datetime.now().isoformat())
    s.identifier = 'ISA-{}'.format(uuid.uuid4().hex[:8])

    i = Investigation()
    i.contacts = [contact]
    i.description = s.description
    i.title = s.title
    i.identifier = s.identifier

    i.studies = [s]
    isatab.dump(isa_obj=i,
                output_path=target_dir,
                i_file_name='i_investigation.txt')

    for assay in s.assays:
        for data_file in assay.data_files:
            data_file_path = os.path.join(target_dir, data_file.filename)
            with open(data_file_path, 'a'):
                os.utime(data_file_path, None)
예제 #24
0
def main(arg):
    """ Given a SERVER value (and BRAPI isa_study identifier), generates an ISA-Tab document"""

    client = BrapiClient(SERVER, logger)
    converter = BrapiToIsaConverter(logger, SERVER)

    # iterating through the trials held in a BRAPI server:
    # for trial in client.get_trials(TRIAL_IDS):
    for trial in get_trials(client):
        logger.info('we start from a set of Trials')
        investigation = Investigation()

        output_directory = get_output_path( trial['trialName'])
        logger.info("Generating output in : "+ output_directory)

        if 'contacts' in trial.keys():
            for brapicontact in trial['contacts']:
                #NOTE: brapi has just name atribute -> no seperate first/last name
                ContactName = brapicontact['name'].split(' ')
                contact = Person(first_name=ContactName[0], last_name=ContactName[1],
                affiliation=brapicontact['institutionName'], email=brapicontact['email'])
                investigation.contacts.append(contact)
        
        # iterating through the BRAPI studies associated to a given BRAPI trial:
        for brapi_study in trial['studies']:
            germplasminfo = {}
            #NOTE keeping track of germplasm info for data file generation
            brapi_study_id = brapi_study['studyDbId']
            obs_levels_in_study_and_var, obs_levels = converter.obtain_brapi_obs_levels_and_var(brapi_study_id)
            # NB: this method always create an ISA Assay Type
            isa_study, investigation = converter.create_isa_study(brapi_study_id, investigation, obs_levels_in_study_and_var.keys())
            investigation.studies.append(isa_study)

            # creating the main ISA protocols:
            sample_collection_protocol = Protocol(name="sample collection",
                                                  protocol_type=OntologyAnnotation(term="sample collection"))
            isa_study.protocols.append(sample_collection_protocol)

            # !!!: fix isatab.py to access other protocol_type values to enable Assay Tab serialization
            # TODO: see https://github.com/ISA-tools/isa-api/blob/master/isatools/isatab.py#L886
            phenotyping_protocol = Protocol(name="phenotyping",
                                            protocol_type=OntologyAnnotation(term="nucleic acid sequencing"))
            isa_study.protocols.append(phenotyping_protocol)

            # Getting the list of all germplasms used in the BRAPI isa_study:
            germplasms = client.get_study_germplasms(brapi_study_id)

            germ_counter = 0
            
            # Iterating through the germplasm considered as biosource,
            # For each of them, we retrieve their attributes and create isa characteristics
            for germ in germplasms:
                # print("GERM:", germ['germplasmName']) # germplasmDbId
                # WARNING: BRAPIv1 endpoints are not consistently using these
                # depending on endpoints, attributes may have to swapped
                # get_germplasm_chars(germ)
                # Creating corresponding ISA biosources with is Creating isa characteristics from germplasm attributes.
                # ------------------------------------------------------
                source = Source(name=germ['germplasmName'], characteristics=converter.create_germplasm_chars(germ))
                
                if germ['germplasmDbId'] not in germplasminfo:
                    germplasminfo[germ['germplasmDbId']] = [germ['accessionNumber']]

                # Associating ISA sources to ISA isa_study object
                isa_study.sources.append(source)

                germ_counter = germ_counter + 1

            # Now dealing with BRAPI observation units and attempting to create ISA samples
            create_study_sample_and_assay(client, brapi_study_id, isa_study,  sample_collection_protocol, phenotyping_protocol)

            # Writing isa_study to ISA-Tab format:
            # --------------------------------
            try:
                # isatools.isatab.dumps(investigation)  # dumps() writes out the ISA
                # !!!: fix isatab.py to access other protocol_type values to enable Assay Tab serialization
                # !!!: if Assay Table is missing the 'Assay Name' field, remember to check protocol_type used !!!
                isatab.dump(isa_obj=investigation, output_path=output_directory)
                logger.info('DONE!...')
            except IOError as ioe:
                logger.info('CONVERSION FAILED!...')
                logger.info(str(ioe))

            try:
                variable_records = converter.create_isa_tdf_from_obsvars(client.get_study_observed_variables(brapi_study_id))
                # Writing Trait Definition File:
                # ------------------------------
                write_records_to_file(this_study_id=str(brapi_study_id),
                                      this_directory=output_directory,
                                      records=variable_records,
                                      filetype="t_")
            except Exception as ioe:
                print(ioe)

            # Getting Variable Data and writing Measurement Data File
            # -------------------------------------------------------
            for level, variables in obs_levels_in_study_and_var.items():
                try:
                    obsvarlist = []
                    for i in client.get_study_observation_units(brapi_study_id):
                        obsvarlist.append(i)
                    data_readings = converter.create_isa_obs_data_from_obsvars(obsvarlist, list(variables) ,level, germplasminfo, obs_levels)
                    logger.debug("Generating data files")
                    write_records_to_file(this_study_id=str(brapi_study_id), this_directory=output_directory, records=data_readings,
                                        filetype="d_", ObservationLevel=level)
                except Exception as ioe:
                    print(ioe)
예제 #25
0
def measure_real_world(n_rows):
    starting_time = time.process_time_ns()
    investigation = Investigation()
    investigation.identifier = "i1"

    study = Study(filename="s_study.txt")
    study.identifier = "s1"
    investigation.studies.append(study)

    # Ontologies
    ontologies = {}
    ontologies["NCBITaxon"] = OntologySource(
        name="NCBITaxon",
        file="http://purl.obolibrary.org/obo/ncbitaxon",
        description=
        "National Center for Biotechnology Information (NCBI) Organismal Classification"
    )
    ontologies["AGRO"] = OntologySource(
        name="AGRO",
        file="http://purl.obolibrary.org/obo/agro/releases/2018-05-14/agro.owl",
        description="Agronomy Ontology",
        version="2018-05-14")
    ontologies["UO"] = OntologySource(
        name="UO",
        file="http://data.bioontology.org/ontologies/UO",
        description="Units of Measurement Ontology",
        version="38802")
    investigation.ontology_source_references.extend(ontologies.values())

    # Factors
    fa_soil_cover = StudyFactor(name="Soil Cover")
    fa_plant_movement = StudyFactor(name="Plant Movement")
    study.factors.extend([fa_soil_cover, fa_plant_movement])

    fav_covered = FactorValue(factor_name=fa_soil_cover, value="covered")
    fav_uncovered = FactorValue(factor_name=fa_soil_cover, value="uncovered")
    fav_rotating = FactorValue(factor_name=fa_plant_movement, value="rotating")
    fav_stationary = FactorValue(factor_name=fa_plant_movement,
                                 value="stationary")

    # Protocols
    prot_phenotyping = Protocol(name="Phenotyping")
    prot_growth = Protocol(name="Growth")
    prot_watering = Protocol(name="Watering")
    study.protocols.append(prot_phenotyping)
    study.protocols.append(prot_growth)
    study.protocols.append(prot_watering)

    assay = Assay(filename="a_assay.txt")
    study.assays.append(assay)

    # Characteristics
    common_characteristics = [
        Characteristic(
            category=OntologyAnnotation(term="Organism"),
            value=OntologyAnnotation(
                term="Arabidopsis thaliana",
                term_source=ontologies["NCBITaxon"],
                term_accession="http://purl.obolibrary.org/obo/NCBITaxon_3702")
        ),
        Characteristic(
            category=OntologyAnnotation(term="Genus"),
            value=OntologyAnnotation(
                term="Arabidopsis",
                term_source=ontologies["NCBITaxon"],
                term_accession="http://purl.obolibrary.org/obo/NCBITaxon_3701")
        ),
        Characteristic(category=OntologyAnnotation(term="Species"),
                       value=OntologyAnnotation(term="thaliana")),
        Characteristic(category=OntologyAnnotation(term="Infraspecific Name"),
                       value=OntologyAnnotation(term=" ")),
        Characteristic(
            category=OntologyAnnotation(term="Biological Material Latitude"),
            value=OntologyAnnotation(term="51.827721")),
        Characteristic(
            category=OntologyAnnotation(term="Biological Material Longitude"),
            value=OntologyAnnotation(term="11.27778")),
        Characteristic(
            category=OntologyAnnotation(term="Material Source ID"),
            value=OntologyAnnotation(
                term=
                "http://eurisco.ipk-gatersleben.de/apex/f?p=103:16:::NO::P16_EURISCO_ACC_ID:1668187"
            )),
        Characteristic(
            category=OntologyAnnotation(term="Seed Origin"),
            value=OntologyAnnotation(
                term="http://arabidopsis.info/StockInfo?NASC_id=22680")),
        Characteristic(
            category=OntologyAnnotation(term="Growth Facility"),
            value=OntologyAnnotation(term="small LemnaTec phytochamber")),
        Characteristic(
            category=OntologyAnnotation(term="Material Source Latitude"),
            value=OntologyAnnotation(term="51.827721")),
        Characteristic(
            category=OntologyAnnotation(term="Material Source Longitude"),
            value=OntologyAnnotation(term="11.27778"))
    ]
    sample_characteristic = Characteristic(
        category=OntologyAnnotation(term="Observation Unit Type"),
        value=OntologyAnnotation(term="plant"))

    # Growth Parameters
    growth_parameters = {
    }  # Name => [Value, Value REF, Value Accession, Unit, Unit REF, Unit Accession]
    with open("growth_parameters.csv") as gp:
        r = csv.DictReader(gp, delimiter=';')
        for row in r:
            growth_parameters[row["Parameter name"]] = list(
                row.values())[1:len(row)]
            prot_growth.parameters.append(
                ProtocolParameter(parameter_name=OntologyAnnotation(
                    term=row["Parameter name"])))

    growth_parameter_values = []
    for param in prot_growth.parameters:
        field_values = growth_parameters[param.parameter_name.term]
        if field_values[3]:
            if field_values[4]:
                unit = OntologyAnnotation(
                    term=field_values[3],
                    term_accession=field_values[5],
                    term_source=ontologies[field_values[4]])
            else:
                unit = OntologyAnnotation(term=field_values[3])
            # If there is a unit, the value should be a number
            value = float(field_values[0])
        else:
            unit = None
            if field_values[1]:
                value = OntologyAnnotation(
                    term=field_values[0],
                    term_accession=field_values[2],
                    term_source=ontologies[field_values[1]])
            else:
                value = OntologyAnnotation(term=field_values[0])

        growth_parameter_values.append(
            ParameterValue(category=param, value=value, unit=unit))

    # Write Study File
    for i in range(0, n_rows):
        source = Source(name='Plant_{}'.format(i))
        sample = Sample(name="1135FA-{}".format(i))
        study.samples.append(sample)
        study.sources.append(source)

        proc_growth = Process(executes_protocol=prot_growth)
        proc_growth.inputs.append(source)
        proc_growth.outputs.append(sample)
        study.process_sequence.append(proc_growth)

        source.characteristics.extend(common_characteristics)
        proc_growth.parameter_values.extend(growth_parameter_values)
        if i % 2 == 0:
            sample.factor_values.extend([fav_covered, fav_rotating])
        else:
            sample.factor_values.extend([fav_uncovered, fav_stationary])
        sample.characteristics.append(sample_characteristic)

    ## Read Phenotyping Parameters
    prot_phenotyping_parameters = {}
    with open("phenotyping_parameters.csv") as gp:
        r = csv.DictReader(gp, delimiter=';')
        for row in r:
            param = ProtocolParameter(parameter_name=OntologyAnnotation(
                term=row["Parameter name"]))
            prot_phenotyping_parameters[row["Parameter name"]] = param
            prot_phenotyping.parameters.append(param)

    prot_watering_parameters = {
        "Irrigation Type":
        ProtocolParameter(parameter_name=OntologyAnnotation(
            term="Irrigation Type")),
        "Volume":
        ProtocolParameter(parameter_name=OntologyAnnotation(term="Volume")),
    }
    prot_watering.parameters = prot_watering_parameters.values()

    datafile_comment = Comment(name="Image analysis tool", value="IAP")
    for i, sample in enumerate(study.samples):
        phenotyping_process = Process(executes_protocol=prot_phenotyping)
        phenotyping_process.inputs.append(sample)

        datafile = DataFile(
            filename=
            "{}FA_images/fluo/side/54/1135FA1001 side.fluo das_54 DEG_000 2011-10-12 11_09_36.png"
            .format(i),
            label="Raw Data File",
            generated_from=[sample])
        phenotyping_process.outputs.append(datafile)

        phenotyping_process.parameter_values.extend([
            ParameterValue(
                category=prot_phenotyping_parameters["Imaging Time"],
                value="28.09.2011 12:34:37"),
            ParameterValue(
                category=prot_phenotyping_parameters["Camera Configuration"],
                value="A_Fluo_Side_Big_Plant"),
            ParameterValue(
                category=prot_phenotyping_parameters["Camera Sensor"],
                value="FLUO"),
            ParameterValue(category=prot_phenotyping_parameters["Camera View"],
                           value="side"),
            ParameterValue(
                category=prot_phenotyping_parameters["Imaging Angle"],
                value=90.0,
                unit=OntologyAnnotation(
                    term="degree",
                    term_source=ontologies["UO"],
                    term_accession="http://purl.obolibrary.org/obo/UO_0000185")
            ),
        ])

        watering_process = Process(executes_protocol=prot_watering)
        watering_process.inputs.append(datafile)
        datafile2 = DataFile(
            filename="derived_data_files/das_{}.txt".format(i),
            label="Derived Data File",
            generated_from=[datafile])
        datafile2.comments.append(datafile_comment)
        watering_process.outputs.append(datafile2)

        watering_process.parameter_values.extend([
            ParameterValue(
                category=prot_watering_parameters["Irrigation Type"],
                value="automated (LemnaTec target weight)"),
            ParameterValue(
                category=prot_watering_parameters["Volume"],
                value=80.4,
                unit=OntologyAnnotation(
                    term="g",
                    term_source=ontologies["UO"],
                    term_accession="http://purl.obolibrary.org/obo/UO_0000021")
            ),
        ])

        plink(phenotyping_process, watering_process)
        assay.samples.append(sample)
        assay.data_files.append(datafile)
        assay.data_files.append(datafile2)
        assay.process_sequence.append(phenotyping_process)
        assay.process_sequence.append(watering_process)

    isatab.dump(investigation, "./")
    return time.process_time_ns() - starting_time
예제 #26
0
def measure_reduced(n_rows):
    starting_time = time.process_time_ns()
    investigation = Investigation()
    investigation.identifier = "i1"

    study = Study(filename="s_study.txt")
    study.identifier = "s1"
    investigation.studies.append(study)

    sample_collection_protocol = Protocol(name="sample collection")
    study.protocols.append(sample_collection_protocol)

    ncbitaxon = OntologySource(name='NCBITaxon', description="NCBI Taxonomy")
    characteristic_organism = Characteristic(
        category=OntologyAnnotation(term="Organism"),
        value=OntologyAnnotation(
            term="H**o Sapiens",
            term_source=ncbitaxon,
            term_accession="http://purl.bioontology.org/ontology/NCBITAXON/9606"
        ))
    for i in range(0, n_rows):
        source = Source(name='source_material-{}'.format(i))
        sample = Sample(name="sample_material-{}".format(i))
        sample.characteristics.append(characteristic_organism)
        study.samples.append(sample)
        study.sources.append(source)

        sample_collection_process = Process(
            executes_protocol=sample_collection_protocol)
        sample_collection_process.inputs.append(source)
        sample_collection_process.outputs.append(sample)

        study.process_sequence.append(sample_collection_process)

    # Next, we build n Assay object and attach two protocols, extraction and sequencing.
    assay = Assay(filename="a_assay.txt")
    extraction_protocol = Protocol(name='extraction')
    study.protocols.append(extraction_protocol)
    sequencing_protocol = Protocol(name='sequencing')
    study.protocols.append(sequencing_protocol)

    for i, sample in enumerate(study.samples):

        # create an extraction process that executes the extraction protocol

        extraction_process = Process(executes_protocol=extraction_protocol)

        # extraction process takes as input a sample, and produces an extract material as output

        extraction_process.inputs.append(sample)
        material = Material(name="extract-{}".format(i))
        material.type = "Extract Name"
        extraction_process.outputs.append(material)

        # create a sequencing process that executes the sequencing protocol

        sequencing_process = Process(executes_protocol=sequencing_protocol)
        sequencing_process.name = "assay-name-{}".format(i)
        sequencing_process.inputs.append(extraction_process.outputs[0])

        # Sequencing process usually has an output data file

        datafile = DataFile(filename="sequenced-data-{}".format(i),
                            label="Raw Data File",
                            generated_from=[sample])
        sequencing_process.outputs.append(datafile)

        # Ensure Processes are linked forward and backward. plink(from_process, to_process) is a function to set
        # these links for you. It is found in the isatools.model package

        plink(extraction_process, sequencing_process)

        # make sure the extract, data file, and the processes are attached to the assay

        assay.samples.append(sample)
        assay.data_files.append(datafile)
        assay.other_material.append(material)
        assay.process_sequence.append(extraction_process)
        assay.process_sequence.append(sequencing_process)
        assay.measurement_type = OntologyAnnotation(term="gene sequencing")
        assay.technology_type = OntologyAnnotation(
            term="nucleotide sequencing")

    # attach the assay to the study

    study.assays.append(assay)

    isatab.dump(investigation, "./")
    return time.process_time_ns() - starting_time
예제 #27
0
def create_descriptor():
    dom = xml.dom.minidom.parse('./dataset-100194.xml')
    root = dom.documentElement

    data = root.getElementsByTagName('dataset')
    dataset = data[0]

    print(dataset.nodeName)

    investigation = Investigation()
    investigation.studies.append(Study())

    # ------------ dataset ---------------
    investigation.studies[0].filename = "s_study.txt"
    investigation.studies[0].identifier = "10.5524/100001"
    investigation.studies[0].title = "test dataset"
    investigation.studies[0].description = "this is test dataset"
    investigation.studies[0].public_release_date = "2016/11/11"

    # submitter
    contact = Person(first_name="Alice",
                     last_name="Robertson",
                     affiliation="University of Life",
                     email="*****@*****.**",
                     roles=[OntologyAnnotation(term='submitter')])
    investigation.studies[0].contacts.append(contact)

    publication = Publication(doi="10.5524/manuscript10002")
    publication.status = OntologyAnnotation(term="published")
    investigation.studies[0].publications.append(publication)

    #Data Repository
    investigation.studies[0].comments = []
    comment1 = Comment(name="Data Repository", value="ftp://climb.genomics.cn")
    investigation.studies[0].comments.append(comment1)

    #Data Record Accession
    comment2 = Comment(name="Data Record Accession",
                       value="ftp://climb.genomics.cn")
    investigation.studies[0].comments.append(comment2)

    ##funder
    comment3 = Comment(name="Funder Term Source REF",
                       value="ftp://climb.genomics.cn")  # funder url
    investigation.studies[0].comments.append(comment3)
    comment4 = Comment(name="Grant Identifier",
                       value="National ....")  # funder award
    investigation.studies[0].comments.append(comment4)
    comment5 = Comment(name="Awardee", value="National ....")  # funder comment
    investigation.studies[0].comments.append(comment5)

    ##publication
    comment6 = Comment(name="Data Repository",
                       value="GigaScience database")  # publication
    investigation.studies[0].comments.append(comment6)

    ##author
    author1 = Person(first_name="Alice", last_name="Robertson", roles="author")
    #if contain orcid
    comment7 = Comment(name="Study Person ORCID", value="111111-22221-00000")
    investigation.studies[0].comments.append(comment7)

    ##dataset type eg. Genomics
    comment8 = Comment(name="Subject Keywords", value="Genomics")
    investigation.studies[0].comments.append(comment8)

    ##dataset keyword
    comment9 = Comment(name="key", value="rna sequences")
    investigation.studies[0].comments.append(comment9)

    # ------------ sample ---------------
    source = Source(name='source_material')
    investigation.studies[0].materials['sources'].append(source)
    #sample name
    sample = Sample(name="SAMEA3518466", derives_from=source)
    #sample attribute
    ncbitaxon = OntologySource(name='NCBITaxon', description="NCBI Taxonomy")
    characteristic1 = Characteristic(
        category=OntologyAnnotation(term="Organism"),
        value=OntologyAnnotation(
            term="H**o Sapiens",
            term_source=ncbitaxon,
            term_accession="http://purl.bioontology.org/ontology/NCBITAXON/9606"
        ))
    sample.characteristics.append(characteristic1)
    # characteristic2 = Characteristic(category="Term Source Ref", value="NCBITaxon")
    # sample.characteristics.append(characteristic2)
    # characteristic3 = Characteristic(category="Term Accession Number", value="http://eol_link")  #eol_link not need now
    # sample.characteristics.append(characteristic3)
    characteristic4 = Characteristic(
        category=OntologyAnnotation(term="geolocation"),
        value="10.222/2.00002222")  #eol_link not need now
    sample.characteristics.append(characteristic4)

    investigation.studies[0].materials['samples'].append(sample)

    #protocols
    sample_collection_protocol = Protocol(
        name="sample collection",
        protocol_type=OntologyAnnotation(term="sample collection"))
    investigation.studies[0].protocols.append(sample_collection_protocol)

    data_collection_protocol = Protocol(
        name="data collection",
        protocol_type=OntologyAnnotation(term="data collection"))
    investigation.studies[0].protocols.append(sample_collection_protocol)

    # study-level process sequence is needed to declare samples
    sample_collection_process = Process(
        executes_protocol=sample_collection_protocol)

    # Be careful here, this bit of code says to attach all sources to one process instance producing all samples
    # This works right now as there is only one source and sample, but if there are multiple source->collection-sample
    # instances, make sure you use a new Process object to hold them every time, otherwise the 1-1 relationship between
    # source and sample may be lost!
    for src in investigation.studies[0].materials['sources']:
        sample_collection_process.inputs.append(src)
    for sam in investigation.studies[0].materials['samples']:
        sample_collection_process.outputs.append(sam)

    investigation.studies[0].process_sequence.append(sample_collection_process)
    # ------------ file ---------------

    assay = Assay(filename="a_assay.txt")

    datafile = DataFile(
        filename="ftp://xxxxxxxxx",
        label="Raw Data File")  # needs 'label' set as it is the column name
    datafile.comments = []
    comment10 = Comment(name="File Description", value="test file")
    datafile.comments.append(comment10)

    assay.data_files.append(datafile)

    # assay-level process sequence is needed to declare data files
    data_collection_process = Process(
        executes_protocol=data_collection_protocol)
    data_collection_process.inputs.append(sample)
    data_collection_process.outputs.append(datafile)
    assay.process_sequence.append(data_collection_process)

    investigation.studies[0].assays.append(assay)

    from isatools.isatab import dump
    return dump(isa_obj=investigation, output_path='.')
예제 #28
0
def convert(json_path, output_path):
    print(json_path)
    print(output_path)

    with open(json_path, 'r') as f:
        dcc_json = json.load(f)

    # print(array['protocol'])
    # for element in array['protocol']:
    #     array['protocol'][element]['id']
    #     array['protocol'][element]['description']
    #     array['protocol'][element]['type']
    #     array['protocol'][element]['filename']

    # for element in array['measurement']:
    #     print(array['measurement'][element]['corrected_mz'])

    # for element in array['subject']:
    #     print(array['subject'][element]['species'])

    # Building the Investigation Object and its elements:

    project_set_json = dcc_json.get('project')

    if len(project_set_json) == 0:
        raise IOError('No project found in input JSON')

    # print(next(iter(project_set_json)))
    project_json = next(iter(project_set_json.values()))
    investigation = Investigation(identifier=project_json['id'])

    obi = OntologySource(name='OBI',
                         description='Ontology for Biomedical Investigations')
    investigation.ontology_source_references.append(obi)

    inv_person = Person(
        first_name=project_json['PI_first_name'],
        last_name=project_json['PI_last_name'],
        email=project_json['PI_email'],
        address=project_json['address'],
        affiliation=(', '.join(
            [project_json['department'], project_json['institution']])),
        roles=[
            OntologyAnnotation(term="",
                               term_source=obi,
                               term_accession="http://purl.org/obo/OBI_1")
        ])
    investigation.contacts.append(inv_person)

    study_set_json = dcc_json.get('study')

    if len(study_set_json) > 0:
        study_json = next(iter(study_set_json.values()))

        study = Study(
            identifier=study_json['id'],
            title=study_json['title'],
            description=study_json['description'],
            design_descriptors=[
                OntologyAnnotation(term=study_json['type'],
                                   term_source=obi,
                                   term_accession="http://purl.org/obo/OBI_1")
            ],
            filename='s_{study_id}.txt'.format(study_id=study_json['id']))

        investigation.studies = [study]

        studyid = study_json['id']
        print(studyid)
        study_person = Person(
            first_name=study_json['PI_first_name'],
            last_name=study_json['PI_last_name'],
            email=study_json['PI_email'],
            address=study_json['address'],
            affiliation=(', '.join(
                [study_json['department'], study_json['institution']])),
            roles=[
                OntologyAnnotation(term='principal investigator',
                                   term_source=obi,
                                   term_accession="http://purl.org/obo/OBI_1")
            ])

        study.contacts.append(study_person)

        for factor_json in dcc_json['factor'].values():
            factor = StudyFactor(name=factor_json['id'])
            study.factors.append(factor)

        for i, protocol_json in enumerate(dcc_json['protocol'].values()):
            oat_p = protocol_json['type']
            oa_protocol_type = OntologyAnnotation(
                term=oat_p,
                term_source=obi,
                term_accession="http://purl.org/obo/OBI_1")
            study.protocols.append(
                Protocol(name=protocol_json['id'],
                         protocol_type=oa_protocol_type,
                         description=protocol_json['description'],
                         uri=protocol_json['filename']))

            if 'MS' in protocol_json['type']:
                study.assays.append(
                    Assay(measurement_type=OntologyAnnotation(
                        term='mass isotopologue distribution analysis',
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_112"),
                          technology_type=OntologyAnnotation(
                              term='mass spectrometry',
                              term_source=obi,
                              term_accession="http://purl.org/obo/OBI_1"),
                          filename='a_assay_ms_{count}.txt'.format(count=i)))

            if 'NMR' in protocol_json['type']:
                study.assays.append(
                    Assay(measurement_type=OntologyAnnotation(
                        term='isotopomer analysis',
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_111"),
                          technology_type=OntologyAnnotation(
                              term='nmr spectroscopy',
                              term_source=obi,
                              term_accession="http://purl.org/obo/OBI_1"),
                          filename='a_assay_nmr.txt'))

        for subject_json in dcc_json['subject'].values():

            # print(array['subject'][element])
            if "organism" in subject_json['type']:

                source = Source(name=subject_json['id'])

                ncbitaxon = OntologySource(name='NCBITaxon',
                                           description="NCBI Taxonomy")
                characteristic_organism = Characteristic(
                    category=OntologyAnnotation(term="Organism"),
                    value=OntologyAnnotation(
                        term=subject_json['species'],
                        term_source=ncbitaxon,
                        term_accession=
                        'http://purl.bioontology.org/ontology/NCBITAXON/9606'))
                source.characteristics.append(characteristic_organism)
                study.sources.append(source)

            elif 'tissue_slice' in subject_json['type']:
                # print(array['subject'][element]['type'])
                source = Source(name=subject_json['id'])
                study.sources.append(source)
                ncbitaxon = OntologySource(name='NCBITaxon',
                                           description="NCBI Taxonomy")
                characteristic_organism = Characteristic(
                    category=OntologyAnnotation(term="Organism"),
                    value=OntologyAnnotation(
                        term=subject_json['species'],
                        term_source=ncbitaxon,
                        term_accession=
                        'http://purl.bioontology.org/ontology/NCBITAXON/9606'))
                source.characteristics.append(characteristic_organism)

                sample = Sample(name=subject_json['id'],
                                derives_from=subject_json['parentID'])
                characteristic_organismpart = Characteristic(
                    category=OntologyAnnotation(term='organism_part'),
                    value=OntologyAnnotation(
                        term=subject_json['tissue_type'],
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_1"))

                sample.characteristics.append(characteristic_organismpart)
                study.samples.append(sample)
                # print(study.samples[0].name)

                sample_collection_process = Process(
                    executes_protocol=study.get_prot(
                        subject_json['protocol.id']))
                sample_collection_process.inputs.append(source)
                sample_collection_process.outputs.append(sample)
                study.process_sequence.append(sample_collection_process)

            else:
                source = Source(name=subject_json['id'])

                ncbitaxon = OntologySource(name='NCBITaxon',
                                           description="NCBI Taxonomy")
                characteristic_organism = Characteristic(
                    category=OntologyAnnotation(term="Organism"),
                    value=OntologyAnnotation(
                        term=subject_json['species'],
                        term_source=ncbitaxon,
                        term_accession=
                        'http://purl.bioontology.org/ontology/NCBITAXON/9606'))
                source.characteristics.append(characteristic_organism)
                study.sources.append(source)
                print(subject_json['id'])
                print(subject_json['species'])
                print(subject_json['type'])
        # for src in investigation.studies[0].materials:
        #
        # for sam in investigation.studies[0].materials:

        for sample_json in dcc_json['sample'].values():

            if 'cells' in sample_json['type']:
                material_separation_process = Process(
                    executes_protocol=study.get_prot(
                        sample_json['protocol.id']))
                material_separation_process.name = sample_json['id']
                # dealing with input material, check that the parent material is already among known samples or sources

                if len([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                ]) == 0:
                    material_in = Sample(name=sample_json['parentID'])
                    material_separation_process.inputs.append(material_in)
                    study.assays[0].samples.append(material_in)
                else:
                    print([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                    ])
                    material_separation_process.inputs.append([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                    ][0])

                material_out = Sample(name=sample_json['id'])
                material_type = Characteristic(
                    category=OntologyAnnotation(term='material_type'),
                    value=OntologyAnnotation(
                        term=sample_json['type'],
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_xxxxxxx"))
                material_out.characteristics.append(material_type)
                material_separation_process.outputs.append(material_out)
                study.assays[0].samples.append(material_out)
                try:
                    sample_collection_process
                except NameError:
                    sample_collection_process = None
                if sample_collection_process is None:
                    sample_collection_process = Process(executes_protocol="")
                else:
                    # plink(protein_extraction_process, data_acq_process)
                    # plink(material_separation_process, protein_extraction_process)

                    plink(sample_collection_process,
                          protein_extraction_process)

            if 'protein_extract' in sample_json['type']:
                protein_extraction_process = Process(
                    executes_protocol=study.get_prot(
                        sample_json['protocol.id']))
                protein_extraction_process.name = sample_json['id']

                if len([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                ]) == 0:
                    material_in = Sample(name=sample_json['parentID'])
                    protein_extraction_process.inputs.append(material_in)
                    study.assays[0].samples.append(material_in)
                else:
                    # print([x for x in study.samples if x.name == sample_json['parentID']])
                    protein_extraction_process.inputs.append(material_in)

                # for material_in in study.samples:
                #     # print("OHO:", material_in.name)
                #     if material_in.name == sample_json['parentID']:
                #         # print("C:",sample_json['parentID'])
                #         #no need to create, just link to process
                #         protein_extraction_process.inputs.append(x)
                #     else:
                #         # print("D:", sample_json['parentID'])
                #         #create new material and link
                #         material_in = Sample(name=sample_json['parentID'])
                #         protein_extraction_process.inputs.append(material_in)

                material_out = Material(name=sample_json['id'])
                material_out.type = "Extract Name"
                material_type = Characteristic(
                    category=OntologyAnnotation(term='material_type'),
                    value=OntologyAnnotation(
                        term=sample_json['type'],
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_1"))
                material_out.characteristics.append(material_type)

                study.assays[0].samples.append(material_in)
                study.assays[0].materials['other_material'].append(material_in)
                try:
                    material_separation_process
                except NameError:
                    material_separation_process = None
                if material_separation_process is None:
                    material_separation_process = Process(executes_protocol="")
                else:
                    # plink(protein_extraction_process, data_acq_process)
                    plink(material_separation_process,
                          protein_extraction_process)

            if 'polar' in sample_json['type']:

                material_in = Material(name=sample_json['parentID'])
                material_type = Characteristic(
                    category=OntologyAnnotation(term='material_type',
                                                term_source=obi),
                    value=OntologyAnnotation(term=sample_json['type'],
                                             term_source=obi))
                material_in.characteristics.append(material_type)
                study.assays[0].materials['other_material'].append(material_in)

                data_acq_process = Process(executes_protocol=study.get_prot(
                    sample_json['protocol.id']))
                data_acq_process.name = sample_json['id']
                datafile = DataFile(
                    filename='{filename}.txt'.format(filename='_'.join(
                        ['mass_isotopomer-data', studyid, sample_json['id']])),
                    label='Raw Data File')
                data_acq_process.outputs.append(datafile)
                # print(study.assays[0].technology_type.term)

                study.assays[0].data_files.append(datafile)
                try:
                    protein_extraction_process
                except NameError:
                    protein_extraction_process = None
                if protein_extraction_process is None:
                    protein_extraction_process = Process(executes_protocol="")
                else:
                    plink(protein_extraction_process, data_acq_process)

            # else:
            #     material_in = Material(name=sample_json['parentID'])
            #     material_out = Material(name=sample_json['id'])
            #     material_type = Characteristic(
            #         category=OntologyAnnotation(term="material_type"),
            #         value=OntologyAnnotation(term=sample_json['type'],
            #                                  term_source=obi,
            #                                  term_accession="http://purl.org/obo/OBI_1"))
            #     material_out.characteristics.append(material_type)
            #     process = Process(executes_protocol=sample_json['protocol.id'])
            #     process.name = sample_json['id']
            #     process.inputs.append(material_in)
            #     process.outputs.append(material_out)
            #
            #     study.assays[0].materials['other_material'].append(material_in)
            #     study.assays[0].materials['other_material'].append(material_out)

            if 'bulk_tissue' in sample_json['type']:
                bulk_process = Process(executes_protocol=study.get_prot(
                    sample_json['protocol.id']))
                bulk_process.name = sample_json['id']

                if len([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                ]) == 0:
                    material_in = Sample(name=sample_json['parentID'])
                    bulk_process.inputs.append(material_in)
                    study.assays[0].samples.append(material_in)
                else:
                    # print([x for x in study.samples if x.name == sample_json['parentID']])
                    bulk_process.inputs.append(material_in)

                    plink(sample_collection_process, bulk_process)

    data_rec_header = '\t'.join(
        ('metabolite name', 'assignment', 'signal intensity', 'retention time',
         'm/z', 'formula', 'adduct', 'isotopologue', 'sample identifier'))
    records = []
    for element in dcc_json['measurement']:
        # metabolite_name: -> compound
        # array['measurement'][element]['signal_intensity']
        record = '\t'.join((dcc_json['measurement'][element]['compound'],
                            dcc_json['measurement'][element]['assignment'],
                            dcc_json['measurement'][element]['raw_intensity'],
                            dcc_json['measurement'][element]['retention_time'],
                            dcc_json['measurement'][element]['corrected_mz'],
                            dcc_json['measurement'][element]['formula'],
                            dcc_json['measurement'][element]['adduct'],
                            dcc_json['measurement'][element]['isotopologue'],
                            dcc_json['measurement'][element]['sample.id']))
        # print(record)
        records.append(record)

    if not os.path.exists(output_path):
        os.makedirs(output_path)
        try:
            with open(
                    '{output_path}/{study_id}-maf-data-nih-dcc-json.txt'.
                    format(output_path=output_path,
                           study_id=studyid), 'w') as fh:
                print(
                    "'writing 'maf file document' to file from 'generate_maf_file' method:..."
                )
                fh.writelines(data_rec_header)
                fh.writelines('\n')
                for item in records:
                    fh.writelines(item)
                    fh.writelines('\n')

            print("writing 'investigation information' to file...")
            print(isatab.dumps(investigation))

            isatab.dump(investigation, output_path=output_path)
        except IOError:
            print("Error: in main() method can't open file or write data")
예제 #29
0
    def _exportISATAB(self, destinationPath, detailsDict):
        """
		Export the dataset's metadata to the directory *destinationPath* as ISATAB
		detailsDict should have the format:
		detailsDict = {
		    'investigation_identifier' : "i1",
		    'investigation_title' : "Give it a title",
		    'investigation_description' : "Add a description",
		    'investigation_submission_date' : "2016-11-03",
		    'investigation_public_release_date' : "2016-11-03",
		    'first_name' : "Noureddin",
		    'last_name' : "Sadawi",
		    'affiliation' : "University",
		    'study_filename' : "my_ms_study",
		    'study_material_type' : "Serum",
		    'study_identifier' : "s1",
		    'study_title' : "Give the study a title",
		    'study_description' : "Add study description",
		    'study_submission_date' : "2016-11-03",
		    'study_public_release_date' : "2016-11-03",
		    'assay_filename' : "my_ms_assay"
		}

		:param str destinationPath: Path to a directory in which the output will be saved
		:param dict detailsDict: Contains several key, value pairs required to for ISATAB
		:raises IOError: If writing one of the files fails
		"""

        from isatools.model import Investigation, Study, Assay, OntologyAnnotation, OntologySource, Person, Publication, Protocol, Source
        from isatools.model import Comment, Sample, Characteristic, Process, Material, DataFile, ParameterValue, plink
        from isatools import isatab
        import isaExplorer as ie

        investigation = Investigation()

        investigation.identifier = detailsDict['investigation_identifier']
        investigation.title = detailsDict['investigation_title']
        investigation.description = detailsDict['investigation_description']
        investigation.submission_date = detailsDict[
            'investigation_submission_date']  #use today if not specified
        investigation.public_release_date = detailsDict[
            'investigation_public_release_date']
        study = Study(filename='s_' + detailsDict['study_filename'] + '.txt')
        study.identifier = detailsDict['study_identifier']
        study.title = detailsDict['study_title']
        study.description = detailsDict['study_description']
        study.submission_date = detailsDict['study_submission_date']
        study.public_release_date = detailsDict['study_public_release_date']
        investigation.studies.append(study)
        obi = OntologySource(
            name='OBI', description="Ontology for Biomedical Investigations")
        investigation.ontology_source_references.append(obi)
        intervention_design = OntologyAnnotation(term_source=obi)
        intervention_design.term = "intervention design"
        intervention_design.term_accession = "http://purl.obolibrary.org/obo/OBI_0000115"
        study.design_descriptors.append(intervention_design)

        # Other instance variables common to both Investigation and Study objects include 'contacts' and 'publications',
        # each with lists of corresponding Person and Publication objects.

        contact = Person(first_name=detailsDict['first_name'],
                         last_name=detailsDict['last_name'],
                         affiliation=detailsDict['affiliation'],
                         roles=[OntologyAnnotation(term='submitter')])
        study.contacts.append(contact)
        publication = Publication(title="Experiments with Data",
                                  author_list="Auther 1, Author 2")
        publication.pubmed_id = "12345678"
        publication.status = OntologyAnnotation(term="published")
        study.publications.append(publication)

        # To create the study graph that corresponds to the contents of the study table file (the s_*.txt file), we need
        # to create a process sequence. To do this we use the Process class and attach it to the Study object's
        # 'process_sequence' list instance variable. Each process must be linked with a Protocol object that is attached to
        # a Study object's 'protocols' list instance variable. The sample collection Process object usually has as input
        # a Source material and as output a Sample material.

        sample_collection_protocol = Protocol(
            id_="sample collection",
            name="sample collection",
            protocol_type=OntologyAnnotation(term="sample collection"))
        aliquoting_protocol = Protocol(
            id_="aliquoting",
            name="aliquoting",
            protocol_type=OntologyAnnotation(term="aliquoting"))

        for index, row in self.sampleMetadata.iterrows():
            src_name = row['Sample File Name']
            source = Source(name=src_name)

            source.comments.append(
                Comment(name='Study Name', value=row['Study']))
            study.sources.append(source)

            sample_name = src_name
            sample = Sample(name=sample_name, derives_from=[source])
            # check if field exists first
            status = row[
                'Status'] if 'Status' in self.sampleMetadata.columns else 'N/A'
            characteristic_material_type = Characteristic(
                category=OntologyAnnotation(term="material type"),
                value=status)
            sample.characteristics.append(characteristic_material_type)

            #characteristic_material_role = Characteristic(category=OntologyAnnotation(term="material role"), value=row['AssayRole'])
            #sample.characteristics.append(characteristic_material_role)

            # check if field exists first
            age = row['Age'] if 'Age' in self.sampleMetadata.columns else 'N/A'
            characteristic_age = Characteristic(
                category=OntologyAnnotation(term="Age"),
                value=age,
                unit='Year')
            sample.characteristics.append(characteristic_age)
            # check if field exists first
            gender = row[
                'Gender'] if 'Gender' in self.sampleMetadata.columns else 'N/A'
            characteristic_gender = Characteristic(
                category=OntologyAnnotation(term="Gender"), value=gender)
            sample.characteristics.append(characteristic_gender)

            ncbitaxon = OntologySource(name='NCBITaxon',
                                       description="NCBI Taxonomy")
            characteristic_organism = Characteristic(
                category=OntologyAnnotation(term="Organism"),
                value=OntologyAnnotation(
                    term="H**o Sapiens",
                    term_source=ncbitaxon,
                    term_accession=
                    "http://purl.bioontology.org/ontology/NCBITAXON/9606"))
            sample.characteristics.append(characteristic_organism)

            study.samples.append(sample)

            # check if field exists first
            sampling_date = row['Sampling Date'] if not pandas.isnull(
                row['Sampling Date']) else None
            sample_collection_process = Process(
                id_='sam_coll_proc',
                executes_protocol=sample_collection_protocol,
                date_=sampling_date)
            aliquoting_process = Process(id_='sam_coll_proc',
                                         executes_protocol=aliquoting_protocol,
                                         date_=sampling_date)

            sample_collection_process.inputs = [source]
            aliquoting_process.outputs = [sample]

            # links processes
            plink(sample_collection_process, aliquoting_process)

            study.process_sequence.append(sample_collection_process)
            study.process_sequence.append(aliquoting_process)

        study.protocols.append(sample_collection_protocol)
        study.protocols.append(aliquoting_protocol)

        ### Add NMR Assay ###
        nmr_assay = Assay(
            filename='a_' + detailsDict['assay_filename'] + '.txt',
            measurement_type=OntologyAnnotation(term="metabolite profiling"),
            technology_type=OntologyAnnotation(term="NMR spectroscopy"))
        extraction_protocol = Protocol(
            name='extraction',
            protocol_type=OntologyAnnotation(term="material extraction"))

        study.protocols.append(extraction_protocol)
        nmr_protocol = Protocol(
            name='NMR spectroscopy',
            protocol_type=OntologyAnnotation(term="NMR Assay"))
        nmr_protocol.add_param('Run Order')
        #if 'Instrument' in self.sampleMetadata.columns:
        nmr_protocol.add_param('Instrument')
        #if 'Sample Batch' in self.sampleMetadata.columns:
        nmr_protocol.add_param('Sample Batch')
        nmr_protocol.add_param('Acquisition Batch')

        study.protocols.append(nmr_protocol)

        #for index, row in sampleMetadata.iterrows():
        for index, sample in enumerate(study.samples):
            row = self.sampleMetadata.loc[
                self.sampleMetadata['Sample File Name'].astype(
                    str) == sample.name]
            # create an extraction process that executes the extraction protocol
            extraction_process = Process(executes_protocol=extraction_protocol)

            # extraction process takes as input a sample, and produces an extract material as output
            sample_name = sample.name
            sample = Sample(name=sample_name, derives_from=[source])
            #print(row['Acquired Time'].values[0])

            extraction_process.inputs.append(sample)
            material = Material(name="extract-{}".format(index))
            material.type = "Extract Name"
            extraction_process.outputs.append(material)

            # create a ms process that executes the nmr protocol
            nmr_process = Process(executes_protocol=nmr_protocol,
                                  date_=datetime.isoformat(
                                      datetime.strptime(
                                          str(row['Acquired Time'].values[0]),
                                          '%Y-%m-%d %H:%M:%S')))

            nmr_process.name = "assay-name-{}".format(index)
            nmr_process.inputs.append(extraction_process.outputs[0])
            # nmr process usually has an output data file
            # check if field exists first
            assay_data_name = row['Assay data name'].values[
                0] if 'Assay data name' in self.sampleMetadata.columns else 'N/A'
            datafile = DataFile(filename=assay_data_name,
                                label="NMR Assay Name",
                                generated_from=[sample])
            nmr_process.outputs.append(datafile)

            #nmr_process.parameter_values.append(ParameterValue(category='Run Order',value=str(i)))
            nmr_process.parameter_values = [
                ParameterValue(category=nmr_protocol.get_param('Run Order'),
                               value=row['Run Order'].values[0])
            ]
            # check if field exists first
            instrument = row['Instrument'].values[
                0] if 'Instrument' in self.sampleMetadata.columns else 'N/A'
            nmr_process.parameter_values.append(
                ParameterValue(category=nmr_protocol.get_param('Instrument'),
                               value=instrument))
            # check if field exists first
            sbatch = row['Sample batch'].values[
                0] if 'Sample batch' in self.sampleMetadata.columns else 'N/A'
            nmr_process.parameter_values.append(
                ParameterValue(category=nmr_protocol.get_param('Sample Batch'),
                               value=sbatch))
            nmr_process.parameter_values.append(
                ParameterValue(
                    category=nmr_protocol.get_param('Acquisition Batch'),
                    value=row['Batch'].values[0]))

            # ensure Processes are linked forward and backward
            plink(extraction_process, nmr_process)
            # make sure the extract, data file, and the processes are attached to the assay
            nmr_assay.samples.append(sample)
            nmr_assay.data_files.append(datafile)
            nmr_assay.other_material.append(material)
            nmr_assay.process_sequence.append(extraction_process)
            nmr_assay.process_sequence.append(nmr_process)
            nmr_assay.measurement_type = OntologyAnnotation(
                term="metabolite profiling")
            nmr_assay.technology_type = OntologyAnnotation(
                term="NMR spectroscopy")

        # attach the assay to the study
        study.assays.append(nmr_assay)

        if os.path.exists(os.path.join(destinationPath,
                                       'i_Investigation.txt')):
            ie.appendStudytoISA(study, destinationPath)
        else:
            isatab.dump(isa_obj=investigation, output_path=destinationPath)
예제 #30
0
    sequencing_process.outputs.append(datafile)

    # Ensure Processes are linked forward and backward. plink(from_process, to_process) is a function to set
    # these links for you. It is found in the isatools.model package

    plink(extraction_process, sequencing_process)

    # make sure the extract, data file, and the processes are attached to the assay

    assay.samples.append(sample)
    assay.data_files.append(datafile)
    assay.other_material.append(material)
    assay.process_sequence.append(extraction_process)
    assay.process_sequence.append(sequencing_process)
#    assay.measurement_type = OntologyAnnotation(term="gene sequencing")
#    assay.technology_type = OntologyAnnotation(term="nucleotide sequencing")

isatab.dump(investigation, ".")
shutil.copyfile(
    "i_investigation.txt",
    "../../isa4J/src/test/resources/de/ipk_gatersleben/bit/bi/isa4j/components/python_originals/i_investigation.txt"
)
shutil.copyfile(
    "s_study.txt",
    "../../isa4J/src/test/resources/de/ipk_gatersleben/bit/bi/isa4j/components/python_originals/s_study.txt"
)
shutil.copyfile(
    "a_assay.txt",
    "../../isa4J/src/test/resources/de/ipk_gatersleben/bit/bi/isa4j/components/python_originals/a_assay.txt"
)