data['length_5_utr'] = (data['utr_start'] - data['utr_end']).apply(abs) ids = grouped[ grouped['relevant_sets'] >= thresh]['glass_transcript_id'] repr_data = data[data['glass_transcript_id'].isin(ids)] rest_data = data[data['glass_transcript_id'].isin( grouped[grouped['relevant_sets'] < thresh]['glass_transcript_id'])] all_data = data[data['glass_transcript_id'].isin( grouped['glass_transcript_id'])] if False: yzer.prep_files_for_homer( all_data, 'all_transcripts_preceding'.format(thresh), yzer.get_filename(dirpath, 'from_genes'), center=False, reverse=False, preceding=True, size=200) yzer.prep_files_for_homer( all_data, 'all_transcripts_promoter'.format(thresh), yzer.get_filename(dirpath, 'from_genes'), center=False, reverse=False, preceding=False, size=200) if False: yzer.prep_files_for_homer( repr_data, 'repressed_in_{0}_kla_{1}_promoter_200'.format(
print grapher.get_gene_names(refseq[(refseq['kla_1_lfc'] >= 1)], add_quotes=True) print grapher.get_gene_names( refseq[(refseq['kla_1_lfc'] >= 1) & (refseq['dex_over_kla_1_lfc'] < -.58)]) if False: yzer = MotifAnalyzer() motif_dirpath = yzer.get_filename(dirpath, 'motifs/size_200') distal = data[data['distal'] == 't'] dataset = distal[(distal['kla_lfc'] >= 1)] yzer.prep_files_for_homer(dataset, 'distal_up_in_kla_all', motif_dirpath, center=False, reverse=False, preceding=True, size=200) dataset = distal[(distal['kla_1_lfc'] >= 1)] yzer.prep_files_for_homer(dataset, 'distal_up_in_kla_1', motif_dirpath, center=False, reverse=False, preceding=True, size=200) dataset = distal[(distal['kla_2_lfc'] >= 1)] yzer.prep_files_for_homer(dataset,
pausing_data = yzer.import_file( yzer.get_filename(dirpath, 'feature_vectors.txt')) data = yzer.import_file( yzer.get_filename(dirpath, 'transcript_vectors.txt')) try: min_ratio = float(sys.argv[1]) except IndexError: min_ratio = 1.5 if False: yzer.prep_files_for_homer(data, 'all_transcripts_promoter', dirpath, center=False, reverse=False, preceding=True, size=200) for replicate_id in ('', 1, 2, 3, 4): rep_str = get_rep_string(replicate_id) key = 'kla_dex_{0}pausing_ratio'.format(rep_str) pausing_data[key] = pausing_data['kla_dex_{0}bucket_score'.format(rep_str)] \ /pausing_data['kla_{0}bucket_score'.format(rep_str)] pausing_data = pausing_data.fillna(0) pausing_ids = pausing_data[ pausing_data[key] >= min_ratio]['glass_transcript_id'] dataset = data[data['id'].isin(pausing_ids)]
data = yzer.import_file( yzer.get_filename( dirpath, 'from_peaks/{0}_promoter_vectors.txt'.format(peak_type))) data['id'] = data['peak_id'] thresh = 2 ids = grouped[ grouped['relevant_sets_primary'] >= thresh]['glass_transcript_id'] dataset = data[data['glass_transcript_id'].isin(ids)] if True: yzer.prep_files_for_homer(data, 'all_{0}_200'.format(peak_type), yzer.get_and_create_path( dirpath, 'from_peaks', peak_type), center=True, reverse=False, preceding=False, size=200) yzer.prep_files_for_homer( dataset, 'paused_in_{0}_at_least_{1}_min_{2}_down_in_dex_{3}_200'.format( thresh, min_ratio, secondary_min_ratio, peak_type), yzer.get_and_create_path(dirpath, 'from_peaks', peak_type), center=True, reverse=False, preceding=False, size=200)