Example #1
0
 def get(record):
     return annotation_tools.join_descriptions(index[getattr(record,join_field)],'/')
 def get(record):
     return annotation_tools.join_descriptions(
         index[getattr(record, join_field)], '/')
Example #3
0
    def run(self):
        assert self.ucsc_name, 'Need a UCSC genome name'
        
        scratch = _ucsc_scratch(self)
        
        # Load annotations
        
        source = 'tt-ucsc-%s-%s' % (self.ucsc_name, self.table)
        
        table = scratch.get_table(self.table)
        get_name = scratch.getter(self.name)
        get_product = scratch.getter(self.product)

        mrnas = [ ]
        
        for item in table:
            ann = annotation.Annotation(
                seqid = item.chrom,
                source = source,
                type = 'mRNA',
                strand = {'+':1, '-':-1}[item.strand],
                start = int(item.txStart),
                end = int(item.txEnd),
                attr = {
                    'ID' : item.name,
                    'Name' : get_name(item),
                    'Product' : get_product(item),
                    #'UCSC_name2' : item.name2,
                    }
                )
            
            ann.record = item
            mrnas.append(ann)

        _uniquify_ids(mrnas)
        
        annotations = [ ]
        
        for group in _grouped_features(mrnas):
            ID = '/'.join(item.attr['ID'] for item in group)
            for item in group:
                item.attr['Parent'] = ID
                item.attr['ID'] = item.attr['ID'] + '-mRNA'
            
            annotations.append(annotation.Annotation(
                source = source,
                type = 'gene',
                seqid = group[0].seqid,
                strand = group[0].strand,
                start = min(item.start for item in group),
                end = max(item.end for item in group),
                attr = {
                    'ID' : ID,
                    'Name' : annotation_tools.join_descriptions([ item.attr['Name'] for item in group ], '/'),
                    'Product' : annotation_tools.join_descriptions([ item.attr['Product'] for item in group ], '/'),
                    #'UCSC_name2' : annotation_tools.join_descriptions([ item.attr['UCSC_name2'] for item in group ], '/'),
                    }
                ))
            for item in group:
                annotations.append(item)
                
                exonStarts = _parse_ints(item.record.exonStarts)
                exonEnds = _parse_ints(item.record.exonEnds)
                cdsStart = int(item.record.cdsStart)
                cdsEnd = int(item.record.cdsEnd)
                for start,end in zip(exonStarts,exonEnds):
                    annotations.append(annotation.Annotation(
                        source = source,
                        type = 'exon',
                        seqid = item.seqid,
                        strand = item.strand,
                        start = start,
                        end = end,
                        attr = {
                            'Parent' : item.attr['ID'],
                            }
                        ))
                    if max(cdsStart,start) < min(cdsEnd,end):
                        annotations.append(annotation.Annotation(
                            source = source,
                            type = 'CDS',
                            seqid = item.seqid,
                            strand = item.strand,
                            start = max(cdsStart,start),
                            end = min(cdsEnd,end),
                            #TODO: phase
                            attr = {
                                'Parent' : item.attr['ID'],
                                }
                            ))

        # Load sequence
        
        if self.download:
            io.execute(['rsync','-P','rsync://hgdownload.cse.ucsc.edu/goldenPath/'+self.ucsc_name+'/bigZips/chromFa.tar.gz',scratch.ucsc/'chromFa.tar.gz'])
        
        with workspace.tempspace() as temp:
            io.execute(['tar','-C',temp.working_dir,'-zxf',scratch.ucsc/'chromFa.tar.gz'])
            sequences = [ temp/item for item in natural_sorted(os.listdir(temp.working_dir)) ]
            
            with open(temp/'reference.gff','wb') as f:
                annotation.write_gff3_header(f)
                for item in annotations:
                    print >> f, item.as_gff()
            
            Make_tt_reference(
                self.output_dir,
                filenames = sequences + [ temp/'reference.gff' ],
                index = self.index,
                ).run()
    def run(self):
        assert self.ucsc_name, 'Need a UCSC genome name'

        scratch = _ucsc_scratch(self)

        # Load annotations

        source = 'tt-ucsc-%s-%s' % (self.ucsc_name, self.table)

        table = scratch.get_table(self.table)
        get_name = scratch.getter(self.name)
        get_product = scratch.getter(self.product)

        mrnas = []

        for item in table:
            ann = annotation.Annotation(
                seqid=item.chrom,
                source=source,
                type='mRNA',
                strand={
                    '+': 1,
                    '-': -1
                }[item.strand],
                start=int(item.txStart),
                end=int(item.txEnd),
                attr={
                    'ID': item.name,
                    'Name': get_name(item),
                    'Product': get_product(item),
                    #'UCSC_name2' : item.name2,
                })

            ann.record = item
            mrnas.append(ann)

        _uniquify_ids(mrnas)

        annotations = []

        for group in _grouped_features(mrnas):
            ID = '/'.join(item.attr['ID'] for item in group)
            for item in group:
                item.attr['Parent'] = ID
                item.attr['ID'] = item.attr['ID'] + '-mRNA'

            annotations.append(
                annotation.Annotation(
                    source=source,
                    type='gene',
                    seqid=group[0].seqid,
                    strand=group[0].strand,
                    start=min(item.start for item in group),
                    end=max(item.end for item in group),
                    attr={
                        'ID':
                        ID,
                        'Name':
                        annotation_tools.join_descriptions(
                            [item.attr['Name'] for item in group], '/'),
                        'Product':
                        annotation_tools.join_descriptions(
                            [item.attr['Product'] for item in group], '/'),
                        #'UCSC_name2' : annotation_tools.join_descriptions([ item.attr['UCSC_name2'] for item in group ], '/'),
                    }))
            for item in group:
                annotations.append(item)

                exonStarts = _parse_ints(item.record.exonStarts)
                exonEnds = _parse_ints(item.record.exonEnds)
                cdsStart = int(item.record.cdsStart)
                cdsEnd = int(item.record.cdsEnd)
                for start, end in zip(exonStarts, exonEnds):
                    annotations.append(
                        annotation.Annotation(source=source,
                                              type='exon',
                                              seqid=item.seqid,
                                              strand=item.strand,
                                              start=start,
                                              end=end,
                                              attr={
                                                  'Parent': item.attr['ID'],
                                              }))
                    if max(cdsStart, start) < min(cdsEnd, end):
                        annotations.append(
                            annotation.Annotation(
                                source=source,
                                type='CDS',
                                seqid=item.seqid,
                                strand=item.strand,
                                start=max(cdsStart, start),
                                end=min(cdsEnd, end),
                                #TODO: phase
                                attr={
                                    'Parent': item.attr['ID'],
                                }))

        # Load sequence

        if self.download:
            io.execute([
                'rsync', '-P', 'rsync://hgdownload.cse.ucsc.edu/goldenPath/' +
                self.ucsc_name + '/bigZips/chromFa.tar.gz',
                scratch.ucsc / 'chromFa.tar.gz'
            ])

        with workspace.tempspace() as temp:
            io.execute([
                'tar', '-C', temp.working_dir, '-zxf',
                scratch.ucsc / 'chromFa.tar.gz'
            ])
            sequences = [
                temp / item
                for item in natural_sorted(os.listdir(temp.working_dir))
            ]

            with open(temp / 'reference.gff', 'wb') as f:
                annotation.write_gff3_header(f)
                for item in annotations:
                    print >> f, item.as_gff()

            Make_tt_reference(
                self.output_dir,
                filenames=sequences + [temp / 'reference.gff'],
                index=self.index,
            ).run()