def remap_expn_sample_names(expn): """ Super-specific method to remap the sample names. """ names = expn.getConditionNames() new_names = [] for n in names: if n in sample_description: new_names.append(sample_description[n]) else: new_names.append('?%s' % n) expn.setConditionNames(new_names) if __name__ == "__main__": # This should later be changed to mm10v79: expn = glload("../te_counts/genes_cpm_expression.glb") # Using the published one for now print(expn.getConditionNames()) print("Testing coherence") print("\nSample names:") # Test that all desc names are actually in the expn and vice versa: expn_names = expn.getConditionNames() desc_names = list(sample_description.keys()) ss1 = set(expn_names) - set(desc_names) ss2 = set(desc_names) - set(expn_names) if ss1: print(">>>Missing in sample_description:\n", '\n'.join(sorted(list(ss1)))) print() if ss2: print(">>>Missing in exp:\n", '\n'.join(sorted(list(ss2))))
'white matter glia': 'White-matter glia', 'X2clc': '2C-like cells (MERVL+ Zscan+)', 'X2clc D1 rp3': '2C-like cells (MERVL+)', 'X2C embryo': '2C Embryo', 'X4C embryo': '4C Embryo', 'X8C embryo': '8C Embryo', 'XEN': 'XEN cells', 'zona limitans intrathalamica': 'Zona limitans intrathalamica', } if __name__ == '__main__': from glbase3 import glload from .gene_layer_name import gene_layer_name # Check all sample names are actually used: expn = glload("../../te_counts/genes_cpm_expression.glb") cond_names = expn.getConditionNames() print('\nFound in "sample_description" but not in expn:') for c in sorted(sample_description.keys()): if c not in cond_names: print('! %s' % (c, )) print('\nFound in "expn" but not in sample_description:') for c in cond_names: if c not in list(sample_description.keys()): print("'%s': ," % (c, )) print() print('Spell check') # For spell check:
def bind_genome(self, genelist_glb_filename): self.genome = glbase3.glload(genelist_glb_filename) print('Loaded %s' % genelist_glb_filename)