Beispiel #1
0
 def new_selection(self, project_id, s, job_name, job_description, *args, **kw):
     '''
     Called by the browser. Transform a selection to a new track;
     '''
     user = handler.user.get_user_in_session(request)
     sels = json.loads(s)
     
     project = DBSession.query(Project).filter(Project.id == project_id).first()
     if project is None :
         return {'error' : "project id %s doesn't exist" % project_id}
     path = track.common.temporary_path()
     
     with track.new(path, 'sql') as t:
         t.fields = simple_fields
         for chromosome in sels:
             t.write(chromosome, ((marquee['start'], marquee['end'], 0, '', 0 , '') for marquee in sels[chromosome]))
         t.datatype = constants.FEATURES
         t.assembly = project.sequence.name
         
     task_id, track_id = handler.track.create_track(user.id, project.sequence, f=path, trackname='%s %s' 
                                      % (job_name, job_description), project=project)
     if task_id  == constants.NOT_SUPPORTED_DATATYPE :
         return {'error' : "not supported datatype" % project_id}
     
     job_id = handler.job.new_sel(user.id, project.id, job_description, job_name, task_id=task_id)
     return {'job_id' : job_id,
             'job_name' : job_name,
             'job_description' : job_description,
             'status' : 'RUNNING'}
Beispiel #2
0
 def export(self, path, format=None):
     with track.new(path, format) as t:
         for chrom in self:
             t.write(chrom, self.read(chrom))
         t.chrmeta = self.chrmeta
         t.info = self.info
     self.close()
Beispiel #3
0
 def runTest(self):
     out_path = temporary_path('.sql')
     with track.new(out_path) as t:
         for chrom in range(5): t.write(str(chrom), [(0,10,'A',0.0,-1)])
         cur = t.cursor()
         cur.execute("CREATE table tmp (koopa text,troopa text)")
         cur.execute("INSERT into  tmp values (?,?)", (1,2))
Beispiel #4
0
def create_repeat_annotation(sequence, fname):
    if checkfile(fname):
        return
    annotation = (x.isupper() for x in sequence)
    intervals = simulator.sequence_to_intervals(simulator.get_sequence(annotation, sequence), "repeat")
    with track.new(fname, "bed") as t:
        t.fields = ["start", "end", "name"]
        t.write("chr1", intervals)
Beispiel #5
0
 def runTest(self):
     out_path = temporary_path('.sql')
     with track.new(out_path) as t:
         for chrom in range(5):
             t.write(str(chrom), [(0, 10, 'A', 0.0, -1)])
         cur = t.cursor()
         cur.execute("CREATE table tmp (koopa text,troopa text)")
         cur.execute("INSERT into  tmp values (?,?)", (1, 2))
Beispiel #6
0
 def runTest(self):
     in_path = samples['small_signals'][7]['sql']
     out_path = temporary_path('.sql')
     with track.load(in_path) as i:
         with track.new(out_path) as o:
             for chrom in i: o.write(chrom, i.read(chrom))
             self.assertEqual(list(o.read('chrI')), list(i.read('chrI')))
     os.remove(out_path)
Beispiel #7
0
 def runTest(self):
     in_path = samples['small_signals'][7]['sql']
     out_path = temporary_path('.sql')
     with track.load(in_path) as i:
         with track.new(out_path) as o:
             for chrom in i:
                 o.write(chrom, i.read(chrom))
             self.assertEqual(list(o.read('chrI')), list(i.read('chrI')))
     os.remove(out_path)
Beispiel #8
0
def create_repeat_annotation(sequence, fname):
    if checkfile(fname):
        return
    annotation = (x.isupper() for x in sequence)
    intervals = simulator.sequence_to_intervals(
        simulator.get_sequence(annotation, sequence), 'repeat')
    with track.new(fname, 'bed') as t:
        t.fields = ['start', 'end', 'name']
        t.write("chr1", intervals)
Beispiel #9
0
def ucsc_geneid_fix(in_gtf, out_gtf, remote=None, local=None):
    """Updates 'gene_id' entries in GTF files downloaded from UCSC
    Table Browser to contain gene IDs instead of transcript IDs.

    If the output GTF file name already exists, it will be overwritten.

    :param in_gtf: path to input GTF file
    :type in_gtf: str
    :param out_gtf: path to output GTF file
    :type out_gtf: str
    :param remote: UCSC database and annotation source to use
    :type remote: dict('db': str, 'annot_src': str)
    :param local: two-column file name containing transcript-gene mapping,
            only when `db` and `annot_src` are None
    :type local: str
    :returns: None

    """
    # remote not defined
    if remote is None:
        # then local must be defined
        if local is None:
            raise ValueError("Missing `remote` or `local` arguments")
        mapping = get_local_transcript_gene_mapping(local)
    # remote defined
    else:
        # then local can not be defined
        if local is not None:
            raise ValueError("Only supply `remote` or `local` argument, " "not both.")
        # remote must have 'db'
        if "db" not in remote:
            raise ValueError("Missing remote database name")
        # and 'annot_src'
        if "annot" not in remote:
            raise ValueError("Missing remote annotation source name")

        db = remote["db"]
        annot = remote["annot"]
        if annot not in QUERIES.keys():
            raise ValueError("Invalid annotation source " "name: {0}".format(annot))

        mapping = get_ucsc_transcript_gene_mapping(annot, db, cred=CRED)

    # remove output file if it exists
    if os.path.exists(out_gtf):
        os.remove(out_gtf)

    with track.load(in_gtf, readonly=True) as in_track, track.new(out_gtf, format="gtf") as out_track:
        # since GTF has custom fields, need to set the out_track to use
        # in_track's fields
        out_track.fields = in_track.fields
        for chrom in in_track.chromosomes:
            chrom_rec = in_track.read(chrom)
            out_track.write(chrom, update_gene_id_attr(chrom_rec, mapping))
Beispiel #10
0
 def newTrack(self, info=None, name=None):
     # Close previous track #
     if self.current_track: self.closeCurrentTrack()
     # Get a file name #
     path = self.file_paths.next()
     # Add it to the result #
     self.tracks.append(path)
     # Create it #
     self.current_track = track.new(path)
     # Add the metadata #
     if info: self.current_track.info.update(info)
Beispiel #11
0
 def runTest(self):
     in_path = samples['small_features'][1]['sql']
     out_path = temporary_path('.sql')
     chrom = 'chrI'
     with track.load(in_path) as i:
         with track.new(out_path) as o:
             o.fields = track.default_fields
             o.write(chrom, i.read(chrom, ('start', 'end')))
             got = tuple(o.read(chrom).next())
             expected = (0, 10, None, None, None)
     self.assertEqual(got, expected)
     os.remove(out_path)
Beispiel #12
0
 def runTest(self):
     in_path = samples['small_features'][1]['sql']
     out_path = temporary_path('.sql')
     chrom = 'chrI'
     with track.load(in_path) as i:
         with track.new(out_path) as o:
             o.fields = track.default_fields
             o.write(chrom, i.read(chrom, ('start','end')))
             got = tuple(o.read(chrom).next())
             expected = (0, 10, None, None, None)
     self.assertEqual(got, expected)
     os.remove(out_path)
Beispiel #13
0
    def new_selection(self, project_id, s, job_name, job_description, *args,
                      **kw):
        '''
        Called by the browser. Transform a selection to a new track;
        '''
        user = handler.user.get_user_in_session(request)
        sels = json.loads(s)

        project = DBSession.query(Project).filter(
            Project.id == project_id).first()
        if project is None:
            return {'error': "project id %s doesn't exist" % project_id}
        path = track.common.temporary_path()

        with track.new(path, 'sql') as t:
            t.fields = simple_fields
            for chromosome in sels:
                t.write(chromosome,
                        ((marquee['start'], marquee['end'], 0, '', 0, '')
                         for marquee in sels[chromosome]))
            t.datatype = constants.FEATURES
            t.assembly = project.sequence.name

        task_id, track_id = handler.track.create_track(
            user.id,
            project.sequence,
            f=path,
            trackname='%s %s' % (job_name, job_description),
            project=project)
        if task_id == constants.NOT_SUPPORTED_DATATYPE:
            return {'error': "not supported datatype" % project_id}

        job_id = handler.job.new_sel(user.id,
                                     project.id,
                                     job_description,
                                     job_name,
                                     task_id=task_id)
        return {
            'job_id': job_id,
            'job_name': job_name,
            'job_description': job_description,
            'status': 'RUNNING'
        }
Beispiel #14
0
 def runTest(self):
     in_path = temporary_path('.sql')
     out_path = temporary_path('.sql')
     with track.new(in_path) as t:
         t.fields = ('start','end','score')
         t.assembly = 'sacCer2'
         t.write('chrI',[(0,2,10),(2,4,20),(6,8,10)])
         result = window_smoothing(t, 2)
         result.export(out_path)
     with track.load(out_path) as t:
         data = t.read('chrI')
         got = map(tuple, data)
     expected = [(0,  1,   8.0),
                 (1,  3,  12.0),
                 (3,  5,  10.0),
                 (5,  6,   8.0),
                 (6,  9,   4.0),
                 (9,  10,  2.0)]
     self.assertEqual(got, expected)
     os.remove(in_path)
     os.remove(out_path)
Beispiel #15
0
def main(n, datadir='data/train_sequences/', fname='simulated_alignment'):
    s1name = "sequence1"
    s2name = "sequence2"
    s3name = "sequence3"
    annotation_name = 'gene'

    alignment_extension = ".fa"
    annotations_extension = ".bed"
    config_extension = ".js"

    if len(sys.argv) > 1:
        n = int(sys.argv[1])
    if len(sys.argv) > 2:
        fname = sys.argv[2]

    master_gene_sequence = MarkovChain(P_START_GENE, P_STOP_GENE)
    human_delete_sequence = MarkovChain(P_START_DELETE, P_STOP_DELETE)
    mouse_delete_sequence = MarkovChain(P_START_DELETE, P_STOP_DELETE)
    horse_delete_sequence = MarkovChain(P_START_DELETE, P_STOP_DELETE)
    mutator_coin = BiasedCoin(P_NOT_MUTATE_GENE)

    master_gene = list()
    human_gene = list()
    mouse_gene = list()
    horse_gene = list()

    human_dna = list()
    mouse_dna = list()
    horse_dna = list()

    for i in range(n):
        # create master_gene item
        g = g2 = g3 = g4 = master_gene_sequence.get_state()

        # mutate master_gene item
        if g:
            g2 = mutator_coin.flip()
            g3 = mutator_coin.flip()
            g4 = mutator_coin.flip()

        dna_mutation_coin = create_dna_mutation_coin(g2 + g3)
        dna_mutation_coin2 = create_dna_mutation_coin(g2 + g4)

        # create DNA item
        c = c2 = c3 = random.randint(0, 3)
        c2 = mutate(c2, g2 + g3)
        c, c2, c3 = [DNA_CHARS[i] for i in (c, c2, c3)]
        if not dna_mutation_coin.flip():
            char_index = random.randint(0, 2)
            if DNA_CHARS[char_index] == c2:
                char_index = 3
            c2 = DNA_CHARS[char_index]

        if not dna_mutation_coin2.flip():
            char_index = random.randint(0, 2)
            if DNA_CHARS[char_index] == c3:
                char_index = 3
            c3 = DNA_CHARS[char_index]

        # delete DNA item
        if human_delete_sequence.get_state():
            c = '-'
        if mouse_delete_sequence.get_state():
            c2 = '-'
        if horse_delete_sequence.get_state():
            c3 = '-'

        # add items to sequence
        master_gene.append(g)
        human_gene.append(g2)
        mouse_gene.append(g3)
        horse_gene.append(g4)

        human_dna.append(c)
        mouse_dna.append(c2)
        horse_dna.append(c3)

    # output
    s1fname = os.path.join(
        datadir,
        fname + '_' + s1name + '_' + annotation_name + annotations_extension)
    if os.path.isfile(s1fname):
        os.remove(s1fname)
    s2fname = os.path.join(
        datadir,
        fname + '_' + s2name + '_' + annotation_name + annotations_extension)
    if os.path.isfile(s2fname):
        os.remove(s2fname)
    s3fname = os.path.join(
        datadir,
        fname + '_' + s3name + '_' + annotation_name + annotations_extension)
    if os.path.isfile(s3fname):
        os.remove(s3fname)

    intervals1 = sequence_to_intervals(get_sequence(human_gene, human_dna),
                                       annotation_name)
    intervals2 = sequence_to_intervals(get_sequence(mouse_gene, mouse_dna),
                                       annotation_name)
    intervals3 = sequence_to_intervals(get_sequence(horse_gene, horse_dna),
                                       annotation_name)

    annotations = Annotations()
    annotations.setAnnotations([annotation_name])
    annotations.addSequences([s1name, s2name, s3name])
    annotations.addAnnotationFile(s1name, annotation_name, s1fname)
    annotations.addAnnotationFile(s2name, annotation_name, s2fname)
    # annotations.addAnnotationFile(s3name, annotation_name,  s3fname)

    Fasta.save(
        [
            (s1name, ''.join(human_dna)),
            (s2name, ''.join(mouse_dna)),
            # (s3name, ''.join(horse_dna))
        ],
        os.path.join(datadir, fname + alignment_extension))

    with track.new(s1fname, 'bed') as t:
        t.fields = ['start', 'end', 'name']
        t.write("chr1", intervals1)
    with track.new(s2fname, 'bed') as t:
        t.fields = ['start', 'end', 'name']
        t.write("chr1", intervals2)
    # with track.new(s3fname, 'bed') as t:
    #     t.fields = ['start', 'end', 'name']
    #     t.write("chr1", intervals3)

    with Open(os.path.join(datadir, fname + config_extension), "w") as f:
        json.dump(annotations.toJSON(), f)