예제 #1
0
        data['length_5_utr'] = (data['utr_start'] - data['utr_end']).apply(abs)

        ids = grouped[
            grouped['relevant_sets'] >= thresh]['glass_transcript_id']

        repr_data = data[data['glass_transcript_id'].isin(ids)]
        rest_data = data[data['glass_transcript_id'].isin(
            grouped[grouped['relevant_sets'] < thresh]['glass_transcript_id'])]
        all_data = data[data['glass_transcript_id'].isin(
            grouped['glass_transcript_id'])]

        if False:
            yzer.prep_files_for_homer(
                all_data,
                'all_transcripts_preceding'.format(thresh),
                yzer.get_filename(dirpath, 'from_genes'),
                center=False,
                reverse=False,
                preceding=True,
                size=200)
            yzer.prep_files_for_homer(
                all_data,
                'all_transcripts_promoter'.format(thresh),
                yzer.get_filename(dirpath, 'from_genes'),
                center=False,
                reverse=False,
                preceding=False,
                size=200)
        if False:
            yzer.prep_files_for_homer(
                repr_data,
                'repressed_in_{0}_kla_{1}_promoter_200'.format(
예제 #2
0
        print grapher.get_gene_names(refseq[(refseq['kla_1_lfc'] >= 1)],
                                     add_quotes=True)
        print grapher.get_gene_names(
            refseq[(refseq['kla_1_lfc'] >= 1)
                   & (refseq['dex_over_kla_1_lfc'] < -.58)])

    if False:
        yzer = MotifAnalyzer()
        motif_dirpath = yzer.get_filename(dirpath, 'motifs/size_200')
        distal = data[data['distal'] == 't']

        dataset = distal[(distal['kla_lfc'] >= 1)]
        yzer.prep_files_for_homer(dataset,
                                  'distal_up_in_kla_all',
                                  motif_dirpath,
                                  center=False,
                                  reverse=False,
                                  preceding=True,
                                  size=200)

        dataset = distal[(distal['kla_1_lfc'] >= 1)]
        yzer.prep_files_for_homer(dataset,
                                  'distal_up_in_kla_1',
                                  motif_dirpath,
                                  center=False,
                                  reverse=False,
                                  preceding=True,
                                  size=200)

        dataset = distal[(distal['kla_2_lfc'] >= 1)]
        yzer.prep_files_for_homer(dataset,
예제 #3
0
    pausing_data = yzer.import_file(
        yzer.get_filename(dirpath, 'feature_vectors.txt'))
    data = yzer.import_file(
        yzer.get_filename(dirpath, 'transcript_vectors.txt'))

    try:
        min_ratio = float(sys.argv[1])
    except IndexError:
        min_ratio = 1.5

    if False:
        yzer.prep_files_for_homer(data,
                                  'all_transcripts_promoter',
                                  dirpath,
                                  center=False,
                                  reverse=False,
                                  preceding=True,
                                  size=200)

    for replicate_id in ('', 1, 2, 3, 4):
        rep_str = get_rep_string(replicate_id)
        key = 'kla_dex_{0}pausing_ratio'.format(rep_str)
        pausing_data[key] = pausing_data['kla_dex_{0}bucket_score'.format(rep_str)] \
                            /pausing_data['kla_{0}bucket_score'.format(rep_str)]
        pausing_data = pausing_data.fillna(0)

        pausing_ids = pausing_data[
            pausing_data[key] >= min_ratio]['glass_transcript_id']
        dataset = data[data['id'].isin(pausing_ids)]
예제 #4
0
        data = yzer.import_file(
            yzer.get_filename(
                dirpath,
                'from_peaks/{0}_promoter_vectors.txt'.format(peak_type)))
        data['id'] = data['peak_id']

        thresh = 2
        ids = grouped[
            grouped['relevant_sets_primary'] >= thresh]['glass_transcript_id']
        dataset = data[data['glass_transcript_id'].isin(ids)]

        if True:
            yzer.prep_files_for_homer(data,
                                      'all_{0}_200'.format(peak_type),
                                      yzer.get_and_create_path(
                                          dirpath, 'from_peaks', peak_type),
                                      center=True,
                                      reverse=False,
                                      preceding=False,
                                      size=200)

        yzer.prep_files_for_homer(
            dataset,
            'paused_in_{0}_at_least_{1}_min_{2}_down_in_dex_{3}_200'.format(
                thresh, min_ratio, secondary_min_ratio, peak_type),
            yzer.get_and_create_path(dirpath, 'from_peaks', peak_type),
            center=True,
            reverse=False,
            preceding=False,
            size=200)