コード例 #1
0
ファイル: tag.py プロジェクト: karmel/vespucci
    def _translate_from_prep(cls, chr_list, flip):
        '''
        Take uploaded values and streamline into ints and sequence ends.
        '''
        # Some files come in with the reads flipped.
        # Allow for insertion of flipped strands.
        if flip:
            strand_char = '+'
        else:
            strand_char = '-'

        for chr_id in chr_list:
            update_query = """
            INSERT INTO "{0}_{chr_id}" (chromosome_id, 
                                        strand, 
                                        "start", 
                                        "end", 
                                        start_end, 
                                        refseq)
            SELECT * FROM (
                SELECT {chr_id}, 
                (CASE WHEN prep.strand_char = '{strand_char}' THEN 1 ELSE 0 END), 
                prep."start", 
                (prep."start" + char_length(prep.sequence_matched)),
                int8range(prep."start", 
                (prep."start" + char_length(prep.sequence_matched)), '[]'),
                NULL::boolean
            FROM "{1}" prep
            JOIN "{2}" chr ON chr.name = prep.chromosome
            WHERE chr.id = {chr_id}) derived;
            """.format(cls._meta.db_table, cls.prep_table,
                       Chromosome._meta.db_table,
                       strand_char=strand_char,
                       chr_id=chr_id)
            execute_query(update_query)
コード例 #2
0
ファイル: tag.py プロジェクト: karmel/vespucci
    def create_parent_table(cls, name):
        '''
        Create table that will be used for these tags,
        dynamically named.
        '''
        name = 'tag_' + name
        cls.set_table_name(name)
        table_sql = """
        CREATE TABLE "{0}" (
            id integer NOT NULL,
            chromosome_id integer default NULL,
            strand smallint default NULL,
            "start" bigint,
            "end" bigint,
            start_end int8range,
            refseq boolean default false
        );
        CREATE SEQUENCE "{0}_id_seq"
            START WITH 1
            INCREMENT BY 1
            NO MINVALUE
            NO MAXVALUE
            CACHE 1;
        ALTER SEQUENCE "{0}_id_seq" OWNED BY "{0}".id;
        ALTER TABLE "{0}" ALTER COLUMN id 
            SET DEFAULT nextval('"{0}_id_seq"'::regclass);
        ALTER TABLE ONLY "{0}" ADD CONSTRAINT {1}_pkey PRIMARY KEY (id);
        """.format(cls._meta.db_table, cls.name)
        execute_query(table_sql)

        cls.table_created = True
コード例 #3
0
ファイル: add_short_reads.py プロジェクト: karmel/vespucci
def create_schema():
    try:
        execute_query("""
                        CREATE SCHEMA "{0}"; 
                        GRANT Create,Usage ON SCHEMA "{0}" TO  "{1}";
                        """.format(current_settings.CURRENT_SCHEMA,
                                   current_settings.DATABASES['default']['USER']))
    except utils.DatabaseError:
        _print('Schema already created.')
コード例 #4
0
ファイル: tag.py プロジェクト: karmel/vespucci
 def _add_indices(cls, chr_list):
     for chr_id in chr_list:
         update_query = """
         CREATE INDEX {0}_{chr_id}_pkey_idx ON "{1}_{chr_id}" USING btree (id);
         CREATE INDEX {0}_{chr_id}_chr_idx ON "{1}_{chr_id}" USING btree (chromosome_id);
         CREATE INDEX {0}_{chr_id}_strand_idx ON "{1}_{chr_id}" USING btree (strand);
         CREATE INDEX {0}_{chr_id}_start_end_idx ON "{1}_{chr_id}" USING gist (start_end);
         ANALYZE "{1}_{chr_id}";
         """.format(cls.name, cls._meta.db_table, chr_id=chr_id)
         execute_query(update_query)
コード例 #5
0
ファイル: tag.py プロジェクト: karmel/vespucci
    def delete_prep_columns(cls):
        '''
        .. warning:: DELETES the associated sequence and strand_char columns to conserve space.

        '''
        table_sql = """ 
        ALTER TABLE "{0}" DROP COLUMN sequence_matched;
        ALTER TABLE "{0}" DROP COLUMN strand_char;
        ALTER TABLE "{0}" DROP COLUMN chromosome;
        """.format(cls._meta.db_table)
        execute_query(table_sql)

        cls.table_created = False
コード例 #6
0
ファイル: transcript.py プロジェクト: karmel/vespucci
 def _set_scores(cls, chr_list):
     try:
         for chr_id in chr_list:
             print('Scoring transcripts for chromosome {}'.format(chr_id))
             query = """
                 SELECT atlas_{0}_{1}{2}.calculate_scores({chr_id});
                 SELECT atlas_{0}_{1}{2}.calculate_standard_error({chr_id});
                 """.format(current_settings.GENOME,
                            current_settings.CELL_TYPE.lower(),
                            current_settings.STAGING, chr_id=chr_id)
             execute_query(query)
     except Exception as e:
         raise e
コード例 #7
0
ファイル: tag.py プロジェクト: karmel/vespucci
    def create_partition_tables(cls):
        '''
        Can't be multiprocessed; too many attempts to ANALYZE at once.
        '''

        for chr_id in current_settings.GENOME_CHOICES[current_settings.GENOME]['chromosomes']:
            table_sql = """
            CREATE TABLE "{0}_{1}" (
                CHECK ( chromosome_id = {1} )
            ) INHERITS ("{0}");""".format(cls._meta.db_table, chr_id)
            execute_query(table_sql)

        trigger_sql = '''
            CREATE OR REPLACE FUNCTION {0}.atlas_tag_insert_trigger()
            RETURNS TRIGGER AS $$
            DECLARE
                refseq_string text;
                start_end_string text;
            BEGIN
                IF NEW.refseq IS NULL THEN
                    refseq_string := 'NULL';
                ELSE
                    refseq_string := NEW.refseq::text;
                END IF;
                
                IF NEW.start_end IS NULL THEN
                    start_end_string := 'NULL';
                ELSE
                    start_end_string := 'int8range(' || quote_literal(NEW.start) || ',' 
                    || quote_literal(NEW."end") || ', ''[]'')';
                END IF;
                EXECUTE 'INSERT INTO "{1}_' || NEW.chromosome_id || '" VALUES ('
                || quote_literal(NEW.id) || ','
                || quote_literal(NEW.chromosome_id) || ','
                || quote_literal(NEW.strand) || ','
                || quote_literal(NEW.start) || ','
                || quote_literal(NEW."end") || ','
                || start_end_string || ','
                || refseq_string || ')';
                RETURN NULL;
            END;
            $$
            LANGUAGE 'plpgsql';
            
            -- Trigger function for inserts on main table
            CREATE TRIGGER atlas_tag_trigger
                BEFORE INSERT ON "{1}"
                FOR EACH ROW EXECUTE PROCEDURE {0}.atlas_tag_insert_trigger();
        '''.format(current_settings.CURRENT_SCHEMA, cls._meta.db_table,)
        execute_query(trigger_sql)
コード例 #8
0
ファイル: transcript.py プロジェクト: karmel/vespucci
 def _draw_transcript_edges(cls, chr_list):
     try:
         for chr_id in chr_list:
             print(
                 'Drawing edges for transcripts for chr_id {}'.format(chr_id))
             query = """
                 SELECT atlas_{0}_{1}{2}.draw_transcript_edges({chr_id},{3},{4});
                 """.format(current_settings.GENOME,
                            current_settings.CELL_TYPE.lower(),
                            current_settings.STAGING,
                            MIN_ONE_RUN_TAGS, current_settings.MAX_EDGE,
                            chr_id=chr_id)
             execute_query(query)
     except Exception as e:
         raise e
コード例 #9
0
ファイル: tag.py プロジェクト: karmel/vespucci
 def create_prep_table(cls, name):
     '''
     Create table that will be used for to upload preparatory tag information,
     dynamically named.
     '''
     cls.set_prep_table('prep_' + name)
     table_sql = """
     CREATE TABLE "{}" (
         strand_char character(1) default NULL,
         chromosome varchar(50),
         "start" bigint,
         sequence_matched varchar(100)
     );
     """.format(cls.prep_table)
     execute_query(table_sql)
コード例 #10
0
ファイル: transcript.py プロジェクト: karmel/vespucci
 def _add_transcripts_from_groseq(cls, chr_list, sequencing_run):
     try:
         for chr_id in chr_list:
             print('Adding transcripts for chromosome {}'.format(chr_id))
             query = """
                 SELECT atlas_{}_{}_prep.save_transcripts_from_sequencing_run(
                     {}, {}, '{}', {}, {}, {}, {}, {}, NULL, NULL);
                 """.format(current_settings.GENOME,
                            current_settings.CELL_TYPE.lower(),
                            sequencing_run.id,
                            chr_id,
                            sequencing_run.source_table.strip(),
                            MAX_GAP,
                            TAG_EXTENSION,
                            current_settings.MAX_EDGE,
                            EDGE_SCALING_FACTOR,
                            current_settings.DENSITY_MULTIPLIER)
             execute_query(query)
     except Exception as e:
         raise e
コード例 #11
0
ファイル: tag.py プロジェクト: karmel/vespucci
    def _set_refseq(cls, chr_list):
        '''
        Set refseq status for segmentation during stitching later on by 
        overlapping with consolidated refseq transcripts.
        '''
        for chr_id in chr_list:
            print('Setting Refseq status for chromosome {0}'.format(chr_id))
            update_query = """
            UPDATE "{0}_{1}" tag 
            SET refseq = true 
            FROM atlas_{2}_refseq_prep.atlas_transcript_{1} ref
            WHERE ref.start_end && tag.start_end
            AND ref.strand = tag.strand
            AND tag.refseq IS NULL;

            UPDATE "{0}_{1}" tag 
            SET refseq = false 
            WHERE refseq IS NULL;
            """.format(cls._meta.db_table, chr_id, current_settings.GENOME)
            execute_query(update_query)
コード例 #12
0
ファイル: peak.py プロジェクト: karmel/vespucci
    def create_table(cls, name):
        '''
        Create table that will be used for these peaks,
        dynamically named.
        '''
        cls.set_table_name('peak_' + name)

        table_sql = """
        CREATE TABLE "{0}" (
            id int4,
            chromosome_id int4,
            "start" int8,
            "end" int8,
            "strand" int2,
            start_end int8range,
            "length" int4,
            summit int8,
            tag_count decimal(10,2) default NULL,
            raw_tag_count decimal(10,2) default NULL,
            score decimal(8,2) default NULL,
            p_value decimal(6,4) default NULL,
            p_value_exp int4 default NULL,
            log_ten_p_value decimal(10,2) default NULL,
            fold_enrichment decimal(10,2) default NULL,
            fdr_threshold decimal(6,4) default NULL,
            fdr_threshold_exp int4 default NULL
            );
        CREATE SEQUENCE "{0}_id_seq"
            START WITH 1
            INCREMENT BY 1
            NO MINVALUE
            NO MAXVALUE
            CACHE 1;
        ALTER SEQUENCE "{0}_id_seq" OWNED BY "{0}".id;
        ALTER TABLE "{0}" ALTER COLUMN id 
            SET DEFAULT nextval('"{0}_id_seq"'::regclass);
        ALTER TABLE ONLY "{0}" ADD CONSTRAINT {1}_pkey PRIMARY KEY (id);
        """.format(cls._meta.db_table, cls.name)
        execute_query(table_sql)

        cls.table_created = True
コード例 #13
0
ファイル: transcript.py プロジェクト: karmel/vespucci
 def _set_density(cls, chr_list,
                  allow_extended_gaps=True,
                  extension_percent='.2',
                  null_only=True):
     '''
     Force reset average tags for prep DB.
     '''
     try:
         for chr_id in chr_list:
             print(
                 'Setting density for transcripts for chr_id {}'.format(chr_id))
             query = """
                 SELECT atlas_{}_{}_prep.set_density({},{},{},{},{},{},{});
                 """.format(current_settings.GENOME,
                            current_settings.CELL_TYPE.lower(),
                            chr_id, current_settings.MAX_EDGE,
                            EDGE_SCALING_FACTOR,
                            current_settings.DENSITY_MULTIPLIER,
                            allow_extended_gaps and 'true' or 'false',
                            extension_percent,
                            null_only and 'true' or 'false')
             execute_query(query)
     except Exception as e:
         raise e
コード例 #14
0
ファイル: transcript.py プロジェクト: karmel/vespucci
 def _stitch_together_transcripts(cls, chr_list,
                                  allow_extended_gaps=True,
                                  extension_percent='.2',
                                  set_density=False,
                                  null_only=True):
     '''
     This is tag-level agnostic, stitching based on gap size alone.
     '''
     try:
         for chr_id in chr_list:
             print(
                 'Stitching together transcripts for chr_id {}'.format(chr_id))
             query = """
                 SELECT atlas_{}_{}_prep.save_transcripts_from_existing({}, {});
                 """.format(current_settings.GENOME,
                            current_settings.CELL_TYPE.lower(),
                            chr_id,
                            MAX_STITCHING_GAP)
             execute_query(query)
             if set_density:
                 print(
                     'Setting density for transcripts for chr_id {}'.format(chr_id))
                 query = """
                     SELECT atlas_{}_{}_prep.set_density({},{},{},{},{},{},{});
                     """.format(current_settings.GENOME,
                                current_settings.CELL_TYPE.lower(),
                                chr_id,
                                current_settings.MAX_EDGE,
                                EDGE_SCALING_FACTOR,
                                current_settings.DENSITY_MULTIPLIER,
                                allow_extended_gaps and 'true' or 'false',
                                extension_percent,
                                null_only and 'true' or 'false')
                 execute_query(query)
     except Exception as e:
         raise e
コード例 #15
0
ファイル: set_up_database.py プロジェクト: karmel/vespucci
        make_option('-g', '--genome', action='store',
                    type='string', dest='genome', default='mm9',
                    help='Currently supported: mm9, dm3'),
        make_option('-c', '--cell_type', action='store',
                    type='string', dest='cell_type',
                    help='Cell type for this run?'),
        make_option('-f', '--final', action='store_true',
                    dest='final', default=False,
                    help='Generate only the final schema and tables, without the prep schema?'),
        make_option('-p', '--prep', action='store_true',
                    dest='prep', default=False,
                    help='Generate only the prep schema and tables, without the final schema?'),
    ]

if __name__ == '__main__':
    django_setup()

    parser = SetUpDatabaseParser()
    options, args = parser.parse_args()

    genome = parser.set_genome(options)
    cell_type, cell_base = parser.set_cell(options)

    generator = AtlasSqlGenerator(cell_type=cell_type, genome=genome)

    print('Creating database schema and tables...')
    q = (options.final and generator.final_set()) \
        or (options.prep and generator.prep_set()) \
        or generator.all_sql()
    execute_query(q)
コード例 #16
0
ファイル: set_up_database.py プロジェクト: karmel/vespucci
        make_option('-c', '--cell_type', action='store',
                    type='string', dest='cell_type',
                    help='Cell type for this run?'),
        make_option('--schema_only', action='store_true',
                    dest='schema_only', default=False,
                    help='Only create schema and stop before importing all the data?'),
    ]

if __name__ == '__main__':
    django_setup()

    parser = SetUpDatabaseParser()
    options, args = parser.parse_args()

    cell_type, cell_base = parser.set_cell(options)
    genome = parser.set_genome(options)

    generator = GenomeResourcesSqlGenerator(cell_type=cell_type, genome=genome)

    print('Creating genome resources database schema and tables...')
    q = generator.all_sql()
    execute_query(q)
    if not options.schema_only and genome in current_settings.GENOME_CHOICES.keys():
        q_set = generator.fill_tables()
        for q in q_set:
            if q:
                execute_query(q)

    # And cleanup the import tables:
    execute_query(generator.cleanup())
コード例 #17
0
ファイル: peak.py プロジェクト: karmel/vespucci
 def add_indices(cls):
     update_query = """
     CREATE INDEX {0}_chr_idx ON "{1}" USING btree (chromosome_id);
     CREATE INDEX {0}_start_end_idx ON "{1}" USING gist (start_end);
     """.format(cls.name, cls._meta.db_table)
     execute_query(update_query)
コード例 #18
0
ファイル: tag.py プロジェクト: karmel/vespucci
 def delete_prep_table(cls):
     table_sql = """
     DROP TABLE "{0}" CASCADE;
     """.format(cls.prep_table)
     execute_query(table_sql)