def genes_from_sites(sites, rho): "Convert samples sites to data structure ready to pass to class Data." result = [] for (_v_g, _z_g, x_g), rho_g in zip(sites, rho): _tmp = DictOf(list) for x_gi, rho_gi in zip(x_g, rho_g): _tmp[x_gi].append(rho_gi) result.append(list(_tmp.iteritems())) return result
'REFSEQ', 'UNIGENE', 'ENSMUSG' ]: print ref_type, len(filter(has_ref_fn(ref_type), mouse_promoters)) print map(mgi_id_for, mouse_promoters[:20]) mgi_to_ensembl = get_mgi_to_ensembl_map() ensembl_promoters = DictOf(list) for p in mouse_promoters: ensembl = ensembl_for(p) if ensembl: ensembl_promoters[ensembl].append(p) sequence_analyser = get_sequence_analyser() analysis = DictOf(list) for ensembl, remos in ensembl_promoters.iteritems(): for remo in remos: analysis[ensembl].append(sequence_analyser(remo.sequence)) fasta = open('mouse-promoters.fa', 'w') for ensembl, remos in ensembl_promoters.iteritems(): for i, remo in enumerate(remos): fasta.write('> %s - %d\n' % (ensembl, i)) fasta.write(remo.sequence) fasta.write('\n') fasta.close()