Ejemplo n.º 1
0
def split_tables(sdrf_path):
    sdrf_df = isatab.read_tfile(sdrf_path)
    sdrf_df_isatab_header = sdrf_df.isatab_header
    sample_name_index = list(sdrf_df.columns).index("Sample Name")
    study_df = sdrf_df[sdrf_df.columns[0:sample_name_index+1]].drop_duplicates()
    study_df.isatab_header = sdrf_df_isatab_header[0:sample_name_index+1]
    assay_df = sdrf_df[sdrf_df.columns[sample_name_index:]]
    assay_df.isatab_header = sdrf_df_isatab_header[sample_name_index:]
    return study_df, assay_df
Ejemplo n.º 2
0
    def replace_factor_with_protocol_parameter_value(self, factor_name,
                                                     protocol_ref):
        """Fixes a factor if it's supposed to be a Parameter Value

        :param factor_name: The factor that's incorrect
        :param protocol_ref: Protocol REF for the new Parameter Value
        :return: None
        """
        table_file_df = isatab.read_tfile(self.path)

        field_names = list(table_file_df.columns)
        clean_field_names = self.clean_isatab_field_names(field_names)

        factor_index = clean_field_names.index(
            'Factor Value[{factor_name}]'.format(factor_name=factor_name))

        with open(self.path) as tfile_fp:
            next(tfile_fp)
            line1 = next(tfile_fp)
            protocol_ref_index = list(
                map(lambda x: x[1:-1] if x[0] == '"' and x[-1] == '"' else x,
                    line1.split('\t'))).index(protocol_ref)

        if protocol_ref_index < 0:
            raise IOError(
                'Could not find protocol ref matching {protocol_ref}'.format(
                    protocol_ref=protocol_ref))

        if factor_index < len(field_names) and \
            'Term Source REF' in field_names[factor_index + 1] and \
                'Term Accession' in field_names[factor_index + 2]:
            log.debug('Moving Factor Value[{}] with term columns'.format(
                factor_name))
            # move Factor Value and Term Source REF and Term Accession columns
            field_names.insert(protocol_ref_index + 1,
                               field_names[factor_index])
            field_names.insert(protocol_ref_index + 2,
                               field_names[factor_index + 1 + 1])
            field_names.insert(protocol_ref_index + 3,
                               field_names[factor_index + 2 + 2])
            del field_names[factor_index + 3]  # del Factor Value[{}]
            del field_names[factor_index + 1 + 2]  # del Term Source REF
            del field_names[factor_index + 2 + 1]  # del Term Accession
        elif factor_index < len(field_names) and \
            'Unit' in field_names[factor_index + 1] and \
                'Term Source REF' in field_names[factor_index + 2] and \
                'Term Accession' in field_names[factor_index + 3]:
            log.debug(
                'Moving Factor Value[{factor_name}] with unit term columns'.
                format(factor_name=factor_name))
            # move Factor Value and Unit as ontology annotation
            field_names.insert(protocol_ref_index + 1,
                               field_names[factor_index])
            field_names.insert(protocol_ref_index + 2,
                               field_names[factor_index + 1 + 1])
            field_names.insert(protocol_ref_index + 3,
                               field_names[factor_index + 2 + 2])
            field_names.insert(protocol_ref_index + 4,
                               field_names[factor_index + 3 + 3])
            del field_names[factor_index + 4]  # del Factor Value[{}]
            del field_names[factor_index + 1 + 3]  # del Unit
            del field_names[factor_index + 2 + 2]  # del Term Source REF
            del field_names[factor_index + 3 + 1]  # del Term Accession
        elif factor_index < len(field_names) and \
                'Unit' in field_names[factor_index + 1]:
            log.debug(
                'Moving Factor Value[{factor_name}] with unit column'.format(
                    factor_name=factor_name))
            # move Factor Value and Unit columns
            field_names.insert(protocol_ref_index + 1,
                               field_names[factor_index])
            field_names.insert(protocol_ref_index + 2,
                               field_names[factor_index + 1 + 1])
            del field_names[factor_index + 2]  # del Factor Value[{}]
            del field_names[factor_index + 1 + 1]  # del Unit
        else:  # move only the Factor Value column
            log.debug('Moving Factor Value[{factor_name}]'.format(
                factor_name=factor_name))
            field_names.insert(protocol_ref_index + 1,
                               field_names[factor_index])
            del field_names[factor_index]  # del Factor Value[{}]

        table_file_df.columns = self.clean_isatab_field_names(field_names)

        # Rename Factor Value column to Parameter Value column
        field_names_modified = list(table_file_df.columns)
        field_names_modified[protocol_ref_index + 1] = \
            field_names_modified[protocol_ref_index + 1].replace(
                'Factor Value', 'Parameter Value')
        table_file_df.columns = self.clean_isatab_field_names(
            field_names_modified)

        investigation = isatab.load(os.path.dirname(self.path),
                                    skip_load_tables=True)
        study = investigation.studies[-1]
        protocol = study.get_prot(protocol_ref)
        if protocol is None:
            raise ISAModelAttributeError(
                'No protocol with name {protocol_ref} was found'.format(
                    protocol_ref=protocol_ref))
        protocol.add_param(factor_name)
        factor = study.get_factor(factor_name)
        if factor is None:
            raise ISAModelAttributeError(
                'No factor with name {factor_name} was found'.format(
                    factor_name=factor_name))
        else:
            study.del_factor(name=factor_name, are_you_sure=True)

        study.filename = '{study_filename}.fix'.format(
            study_filename=study.filename)

        isatab.dump(investigation,
                    output_path=os.path.dirname(self.path),
                    i_file_name='i_Investigation.txt.fix',
                    skip_dump_tables=True)

        with open(
                os.path.join(
                    os.path.dirname(self.path), '{s_filename}.fix'.format(
                        s_filename=os.path.basename(self.path))),
                'w') as out_fp:
            table_file_df.to_csv(path_or_buf=out_fp,
                                 index=False,
                                 sep='\t',
                                 encoding='utf-8')
Ejemplo n.º 3
0
    def replace_factor_with_source_characteristic(self, factor_name):
        """Fixes a factor if it's supposed to be a source characteristic
        attached

        :param factor_name: The factor that's incorrect
        :return: None
        """
        table_file_df = isatab.read_tfile(self.path)

        field_names = list(table_file_df.columns)
        clean_field_names = self.clean_isatab_field_names(field_names)

        factor_index = clean_field_names.index(
            'Factor Value[{}]'.format(factor_name))
        source_name_index = clean_field_names.index('Source Name')

        if factor_index < len(field_names) and \
            'Term Source REF' in field_names[factor_index + 1] and \
                'Term Accession' in field_names[factor_index + 2]:
            log.debug('Moving Factor Value[{}] with term columns'.format(
                factor_name))
            # move Factor Value and Term Source REF and Term Accession columns
            field_names.insert(source_name_index + 1,
                               field_names[factor_index])
            field_names.insert(source_name_index + 2,
                               field_names[factor_index + 1 + 1])
            field_names.insert(source_name_index + 3,
                               field_names[factor_index + 2 + 2])

            del field_names[factor_index + 3]  # del Factor Value[{}]
            del field_names[factor_index + 1 + 2]  # del Term Source REF
            del field_names[factor_index + 2 + 1]  # del Term Accession
        elif factor_index < len(field_names) and \
            'Unit' in field_names[factor_index + 1] and \
                'Term Source REF' in field_names[factor_index + 2] and \
                'Term Accession' in field_names[factor_index + 3]:
            log.debug('Moving Factor Value[{}] with unit term columns'.format(
                factor_name))
            # move Factor Value and Unit as ontology annotation
            field_names.insert(source_name_index + 1,
                               field_names[factor_index])
            field_names.insert(source_name_index + 2,
                               field_names[factor_index + 1 + 1])
            field_names.insert(source_name_index + 3,
                               field_names[factor_index + 2 + 2])
            field_names.insert(source_name_index + 4,
                               field_names[factor_index + 3 + 3])

            del field_names[factor_index + 4]  # del Factor Value[{}]
            del field_names[factor_index + 1 + 3]  # del Unit
            del field_names[factor_index + 2 + 2]  # del Term Source REF
            del field_names[factor_index + 3 + 1]  # del Term Accession
        elif factor_index < len(field_names) and \
                'Unit' in field_names[factor_index + 1]:
            log.debug(
                'Moving Factor Value[{}] with unit column'.format(factor_name))
            # move Factor Value and Unit columns
            field_names.insert(source_name_index + 1,
                               field_names[factor_index])
            field_names.insert(source_name_index + 2,
                               field_names[factor_index + 1 + 1])

            del field_names[factor_index + 2]  # del Factor Value[{}]
            del field_names[factor_index + 1 + 1]  # del Unit
        else:  # move only the Factor Value column
            log.debug('Moving Factor Value[{}]'.format(factor_name))
            field_names.insert(source_name_index + 1,
                               field_names[factor_index])
            del field_names[factor_index]  # del Factor Value[{}]

        table_file_df.columns = self.clean_isatab_field_names(field_names)

        # Rename Factor Value column to Characteristics column
        field_names_modified = list(table_file_df.columns)
        field_names_modified[source_name_index + 1] = \
            field_names_modified[source_name_index + 1].replace(
                'Factor Value', 'Characteristics')
        table_file_df.columns = self.clean_isatab_field_names(
            field_names_modified)

        with open(self.path, 'w') as out_fp:
            table_file_df.to_csv(path_or_buf=out_fp,
                                 index=False,
                                 sep='\t',
                                 encoding='utf-8')
Ejemplo n.º 4
0
def load_df(path):
    df = ISATAB.read_tfile(path)
    df.replace(to_replace = '', value = numpy.nan, inplace = True)
    return df
Ejemplo n.º 5
0
def get_study_df(input_dir, study):
    return ISATAB.read_tfile(os.path.join(input_dir, study.filename))
Ejemplo n.º 6
0
def get_measures_df(input_dir, assay):
    data_filename = get_data_file(assay)
    array = ISATAB.read_tfile(os.path.join(input_dir, data_filename))
    return array
Ejemplo n.º 7
0
def get_assay_df(input_dir, assay):
    return ISATAB.read_tfile(os.path.join(input_dir, assay.filename))
Ejemplo n.º 8
0
    def replace_factor_with_source_characteristic(self, factor_name):
        table_file_df = isatab.read_tfile(self.path)

        field_names = list(table_file_df.columns)
        clean_field_names = self.clean_isatab_field_names(field_names)

        factor_index = clean_field_names.index(
            'Factor Value[{}]'.format(factor_name))
        source_name_index = clean_field_names.index('Source Name')

        if factor_index < len(field_names) and \
            'Term Source REF' in field_names[factor_index + 1] and \
                'Term Accession' in field_names[factor_index + 2]:
            log.debug('Moving Factor Value[{}] with term columns'.format(
                factor_name))
            # move Factor Value and Term Source REF and Term Accession columns
            field_names.insert(source_name_index + 1,
                               field_names[factor_index])
            field_names.insert(source_name_index + 2,
                               field_names[factor_index + 1 + 1])
            field_names.insert(source_name_index + 3,
                               field_names[factor_index + 2 + 2])

            del field_names[factor_index + 3]  # del Factor Value[{}]
            del field_names[factor_index + 1 + 2]  # del Term Source REF
            del field_names[factor_index + 2 + 1]  # del Term Accession
        elif factor_index < len(field_names) and \
            'Unit' in field_names[factor_index + 1] and \
                'Term Source REF' in field_names[factor_index + 2] and \
                'Term Accession' in field_names[factor_index + 3]:
            log.debug('Moving Factor Value[{}] with unit term columns'.format(
                factor_name))
            # move Factor Value and Unit as ontology annotation
            field_names.insert(source_name_index + 1,
                               field_names[factor_index])
            field_names.insert(source_name_index + 2,
                               field_names[factor_index + 1 + 1])
            field_names.insert(source_name_index + 3,
                               field_names[factor_index + 2 + 2])
            field_names.insert(source_name_index + 4,
                               field_names[factor_index + 3 + 3])

            del field_names[factor_index + 4]  # del Factor Value[{}]
            del field_names[factor_index + 1 + 3]  # del Unit
            del field_names[factor_index + 2 + 2]  # del Term Source REF
            del field_names[factor_index + 3 + 1]  # del Term Accession
        elif factor_index < len(field_names) and \
            'Unit' in field_names[factor_index + 1]:
            log.debug(
                'Moving Factor Value[{}] with unit column'.format(factor_name))
            # move Factor Value and Unit columns
            field_names.insert(source_name_index + 1,
                               field_names[factor_index])
            field_names.insert(source_name_index + 2,
                               field_names[factor_index + 1 + 1])

            del field_names[factor_index + 2]  # del Factor Value[{}]
            del field_names[factor_index + 1 + 1]  # del Unit
        else:  # move only the Factor Value column
            log.debug('Moving Factor Value[{}]'.format(factor_name))
            field_names.insert(source_name_index + 1,
                               field_names[factor_index])
            del field_names[factor_index]  # del Factor Value[{}]

        table_file_df.columns = self.clean_isatab_field_names(field_names)

        with open(self.path, 'w') as out_fp:
            table_file_df.to_csv(path_or_buf=out_fp,
                                 index=False,
                                 sep='\t',
                                 encoding='utf-8')