def split_tables(sdrf_path): sdrf_df = isatab.read_tfile(sdrf_path) sdrf_df_isatab_header = sdrf_df.isatab_header sample_name_index = list(sdrf_df.columns).index("Sample Name") study_df = sdrf_df[sdrf_df.columns[0:sample_name_index+1]].drop_duplicates() study_df.isatab_header = sdrf_df_isatab_header[0:sample_name_index+1] assay_df = sdrf_df[sdrf_df.columns[sample_name_index:]] assay_df.isatab_header = sdrf_df_isatab_header[sample_name_index:] return study_df, assay_df
def replace_factor_with_protocol_parameter_value(self, factor_name, protocol_ref): """Fixes a factor if it's supposed to be a Parameter Value :param factor_name: The factor that's incorrect :param protocol_ref: Protocol REF for the new Parameter Value :return: None """ table_file_df = isatab.read_tfile(self.path) field_names = list(table_file_df.columns) clean_field_names = self.clean_isatab_field_names(field_names) factor_index = clean_field_names.index( 'Factor Value[{factor_name}]'.format(factor_name=factor_name)) with open(self.path) as tfile_fp: next(tfile_fp) line1 = next(tfile_fp) protocol_ref_index = list( map(lambda x: x[1:-1] if x[0] == '"' and x[-1] == '"' else x, line1.split('\t'))).index(protocol_ref) if protocol_ref_index < 0: raise IOError( 'Could not find protocol ref matching {protocol_ref}'.format( protocol_ref=protocol_ref)) if factor_index < len(field_names) and \ 'Term Source REF' in field_names[factor_index + 1] and \ 'Term Accession' in field_names[factor_index + 2]: log.debug('Moving Factor Value[{}] with term columns'.format( factor_name)) # move Factor Value and Term Source REF and Term Accession columns field_names.insert(protocol_ref_index + 1, field_names[factor_index]) field_names.insert(protocol_ref_index + 2, field_names[factor_index + 1 + 1]) field_names.insert(protocol_ref_index + 3, field_names[factor_index + 2 + 2]) del field_names[factor_index + 3] # del Factor Value[{}] del field_names[factor_index + 1 + 2] # del Term Source REF del field_names[factor_index + 2 + 1] # del Term Accession elif factor_index < len(field_names) and \ 'Unit' in field_names[factor_index + 1] and \ 'Term Source REF' in field_names[factor_index + 2] and \ 'Term Accession' in field_names[factor_index + 3]: log.debug( 'Moving Factor Value[{factor_name}] with unit term columns'. format(factor_name=factor_name)) # move Factor Value and Unit as ontology annotation field_names.insert(protocol_ref_index + 1, field_names[factor_index]) field_names.insert(protocol_ref_index + 2, field_names[factor_index + 1 + 1]) field_names.insert(protocol_ref_index + 3, field_names[factor_index + 2 + 2]) field_names.insert(protocol_ref_index + 4, field_names[factor_index + 3 + 3]) del field_names[factor_index + 4] # del Factor Value[{}] del field_names[factor_index + 1 + 3] # del Unit del field_names[factor_index + 2 + 2] # del Term Source REF del field_names[factor_index + 3 + 1] # del Term Accession elif factor_index < len(field_names) and \ 'Unit' in field_names[factor_index + 1]: log.debug( 'Moving Factor Value[{factor_name}] with unit column'.format( factor_name=factor_name)) # move Factor Value and Unit columns field_names.insert(protocol_ref_index + 1, field_names[factor_index]) field_names.insert(protocol_ref_index + 2, field_names[factor_index + 1 + 1]) del field_names[factor_index + 2] # del Factor Value[{}] del field_names[factor_index + 1 + 1] # del Unit else: # move only the Factor Value column log.debug('Moving Factor Value[{factor_name}]'.format( factor_name=factor_name)) field_names.insert(protocol_ref_index + 1, field_names[factor_index]) del field_names[factor_index] # del Factor Value[{}] table_file_df.columns = self.clean_isatab_field_names(field_names) # Rename Factor Value column to Parameter Value column field_names_modified = list(table_file_df.columns) field_names_modified[protocol_ref_index + 1] = \ field_names_modified[protocol_ref_index + 1].replace( 'Factor Value', 'Parameter Value') table_file_df.columns = self.clean_isatab_field_names( field_names_modified) investigation = isatab.load(os.path.dirname(self.path), skip_load_tables=True) study = investigation.studies[-1] protocol = study.get_prot(protocol_ref) if protocol is None: raise ISAModelAttributeError( 'No protocol with name {protocol_ref} was found'.format( protocol_ref=protocol_ref)) protocol.add_param(factor_name) factor = study.get_factor(factor_name) if factor is None: raise ISAModelAttributeError( 'No factor with name {factor_name} was found'.format( factor_name=factor_name)) else: study.del_factor(name=factor_name, are_you_sure=True) study.filename = '{study_filename}.fix'.format( study_filename=study.filename) isatab.dump(investigation, output_path=os.path.dirname(self.path), i_file_name='i_Investigation.txt.fix', skip_dump_tables=True) with open( os.path.join( os.path.dirname(self.path), '{s_filename}.fix'.format( s_filename=os.path.basename(self.path))), 'w') as out_fp: table_file_df.to_csv(path_or_buf=out_fp, index=False, sep='\t', encoding='utf-8')
def replace_factor_with_source_characteristic(self, factor_name): """Fixes a factor if it's supposed to be a source characteristic attached :param factor_name: The factor that's incorrect :return: None """ table_file_df = isatab.read_tfile(self.path) field_names = list(table_file_df.columns) clean_field_names = self.clean_isatab_field_names(field_names) factor_index = clean_field_names.index( 'Factor Value[{}]'.format(factor_name)) source_name_index = clean_field_names.index('Source Name') if factor_index < len(field_names) and \ 'Term Source REF' in field_names[factor_index + 1] and \ 'Term Accession' in field_names[factor_index + 2]: log.debug('Moving Factor Value[{}] with term columns'.format( factor_name)) # move Factor Value and Term Source REF and Term Accession columns field_names.insert(source_name_index + 1, field_names[factor_index]) field_names.insert(source_name_index + 2, field_names[factor_index + 1 + 1]) field_names.insert(source_name_index + 3, field_names[factor_index + 2 + 2]) del field_names[factor_index + 3] # del Factor Value[{}] del field_names[factor_index + 1 + 2] # del Term Source REF del field_names[factor_index + 2 + 1] # del Term Accession elif factor_index < len(field_names) and \ 'Unit' in field_names[factor_index + 1] and \ 'Term Source REF' in field_names[factor_index + 2] and \ 'Term Accession' in field_names[factor_index + 3]: log.debug('Moving Factor Value[{}] with unit term columns'.format( factor_name)) # move Factor Value and Unit as ontology annotation field_names.insert(source_name_index + 1, field_names[factor_index]) field_names.insert(source_name_index + 2, field_names[factor_index + 1 + 1]) field_names.insert(source_name_index + 3, field_names[factor_index + 2 + 2]) field_names.insert(source_name_index + 4, field_names[factor_index + 3 + 3]) del field_names[factor_index + 4] # del Factor Value[{}] del field_names[factor_index + 1 + 3] # del Unit del field_names[factor_index + 2 + 2] # del Term Source REF del field_names[factor_index + 3 + 1] # del Term Accession elif factor_index < len(field_names) and \ 'Unit' in field_names[factor_index + 1]: log.debug( 'Moving Factor Value[{}] with unit column'.format(factor_name)) # move Factor Value and Unit columns field_names.insert(source_name_index + 1, field_names[factor_index]) field_names.insert(source_name_index + 2, field_names[factor_index + 1 + 1]) del field_names[factor_index + 2] # del Factor Value[{}] del field_names[factor_index + 1 + 1] # del Unit else: # move only the Factor Value column log.debug('Moving Factor Value[{}]'.format(factor_name)) field_names.insert(source_name_index + 1, field_names[factor_index]) del field_names[factor_index] # del Factor Value[{}] table_file_df.columns = self.clean_isatab_field_names(field_names) # Rename Factor Value column to Characteristics column field_names_modified = list(table_file_df.columns) field_names_modified[source_name_index + 1] = \ field_names_modified[source_name_index + 1].replace( 'Factor Value', 'Characteristics') table_file_df.columns = self.clean_isatab_field_names( field_names_modified) with open(self.path, 'w') as out_fp: table_file_df.to_csv(path_or_buf=out_fp, index=False, sep='\t', encoding='utf-8')
def load_df(path): df = ISATAB.read_tfile(path) df.replace(to_replace = '', value = numpy.nan, inplace = True) return df
def get_study_df(input_dir, study): return ISATAB.read_tfile(os.path.join(input_dir, study.filename))
def get_measures_df(input_dir, assay): data_filename = get_data_file(assay) array = ISATAB.read_tfile(os.path.join(input_dir, data_filename)) return array
def get_assay_df(input_dir, assay): return ISATAB.read_tfile(os.path.join(input_dir, assay.filename))
def replace_factor_with_source_characteristic(self, factor_name): table_file_df = isatab.read_tfile(self.path) field_names = list(table_file_df.columns) clean_field_names = self.clean_isatab_field_names(field_names) factor_index = clean_field_names.index( 'Factor Value[{}]'.format(factor_name)) source_name_index = clean_field_names.index('Source Name') if factor_index < len(field_names) and \ 'Term Source REF' in field_names[factor_index + 1] and \ 'Term Accession' in field_names[factor_index + 2]: log.debug('Moving Factor Value[{}] with term columns'.format( factor_name)) # move Factor Value and Term Source REF and Term Accession columns field_names.insert(source_name_index + 1, field_names[factor_index]) field_names.insert(source_name_index + 2, field_names[factor_index + 1 + 1]) field_names.insert(source_name_index + 3, field_names[factor_index + 2 + 2]) del field_names[factor_index + 3] # del Factor Value[{}] del field_names[factor_index + 1 + 2] # del Term Source REF del field_names[factor_index + 2 + 1] # del Term Accession elif factor_index < len(field_names) and \ 'Unit' in field_names[factor_index + 1] and \ 'Term Source REF' in field_names[factor_index + 2] and \ 'Term Accession' in field_names[factor_index + 3]: log.debug('Moving Factor Value[{}] with unit term columns'.format( factor_name)) # move Factor Value and Unit as ontology annotation field_names.insert(source_name_index + 1, field_names[factor_index]) field_names.insert(source_name_index + 2, field_names[factor_index + 1 + 1]) field_names.insert(source_name_index + 3, field_names[factor_index + 2 + 2]) field_names.insert(source_name_index + 4, field_names[factor_index + 3 + 3]) del field_names[factor_index + 4] # del Factor Value[{}] del field_names[factor_index + 1 + 3] # del Unit del field_names[factor_index + 2 + 2] # del Term Source REF del field_names[factor_index + 3 + 1] # del Term Accession elif factor_index < len(field_names) and \ 'Unit' in field_names[factor_index + 1]: log.debug( 'Moving Factor Value[{}] with unit column'.format(factor_name)) # move Factor Value and Unit columns field_names.insert(source_name_index + 1, field_names[factor_index]) field_names.insert(source_name_index + 2, field_names[factor_index + 1 + 1]) del field_names[factor_index + 2] # del Factor Value[{}] del field_names[factor_index + 1 + 1] # del Unit else: # move only the Factor Value column log.debug('Moving Factor Value[{}]'.format(factor_name)) field_names.insert(source_name_index + 1, field_names[factor_index]) del field_names[factor_index] # del Factor Value[{}] table_file_df.columns = self.clean_isatab_field_names(field_names) with open(self.path, 'w') as out_fp: table_file_df.to_csv(path_or_buf=out_fp, index=False, sep='\t', encoding='utf-8')