Ejemplo n.º 1
0
'''
from glasslab.dataanalysis.misc.rodrigo.samples import get_breed_sets
from glasslab.dataanalysis.motifs.motif_analyzer import MotifAnalyzer
from glasslab.utils.database import get_engine, dataframe_from_query
if __name__ == '__main__':
    yzer = MotifAnalyzer()

    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/' +\
        'Miscellaneous_Collaborations/Rodrigo_CD8s_2014_09/Enhancers_set2'
    dirpath = yzer.get_path(dirpath)

    # Get DB engine
    engine = get_engine(uri='ec2-23-20-125-153.compute-1.amazonaws.com',
                        password='******')

    breed_sets = get_breed_sets()

    # Run and save output from sql queries
    for k, (samples, short_names) in enumerate(breed_sets):
        oth_breed = breed_sets[1 - k]
        for i, sample in enumerate(samples):
            curr_name = short_names[i]
            others = samples[:i] + samples[i + 1:]
            oth_names = short_names[:i] + short_names[i + 1:]
            sql = '''-- {}
        select distinct on (p1.id)
        chr."name" as chr_name, p1."start", p1."end", p1.tag_count,
        p1.*,
        '''.format(sample)
            selects, joins = [], []
            for j, other_sample in enumerate(others):
Ejemplo n.º 2
0
Created on Oct 6, 2014

@author: karmel
'''
from glasslab.dataanalysis.misc.rodrigo.samples import get_threshold,\
    get_breed_sets
from glasslab.dataanalysis.motifs.motif_analyzer import MotifAnalyzer
if __name__ == '__main__':
    yzer = MotifAnalyzer()

    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/' +\
        'Miscellaneous_Collaborations/Rodrigo_CD8s_2014_09/Enhancers_set2'
    dirpath = yzer.get_path(dirpath)

    datasets = {}
    samples, short_names = get_breed_sets()[0]

    for j, sample_prefix in enumerate(short_names):
        sample_dirpath = yzer.get_filename(dirpath, sample_prefix)
        filename = yzer.get_filename(sample_dirpath,
                                     sample_prefix + '_enhancers.txt')

        data = yzer.import_file(filename)
        data = data.fillna(0)

        min_thresh = get_threshold('atac')

        data = data[data['tag_count'] >= min_thresh]

        datasets[sample_prefix] = data