Esempio n. 1
0
 def run(self):
     genome = self.genome
     if os.path.isdir(genome):
         genome = os.path.join(genome, os.path.split(genome)[1]+'.genome')
         print genome
     
     #pref_filename = os.path.join(os.path.expanduser('~'),'igv','prefs.properties')
     #if os.path.exists(pref_filename):
     #    with open(pref_filename,'rb') as f:
     #        lines = f.readlines()
     #    with open(pref_filename,'wb') as f:
     #        for line in lines:
     #            if line.startswith('DEFAULT_GENOME_KEY='):
     #                #line = 'DEFAULT_GENOME_KEY=\n'
     #                continue
     #            f.write(line)
     
     with workspace.tempspace() as temp:
         with open(temp/'batch.txt','wb') as f:
             print >> f, 'new'
             print >> f, 'preference LAST_TRACK_DIRECTORY', os.getcwd()
             print >> f, 'preference LAST_GENOME_IMPORT_DIRECTORY', os.getcwd()
             print >> f, 'genome '+os.path.abspath(genome)
             for filename in self.files:
                 print >> f, 'load '+os.path.abspath(filename)
         
         io.execute(['java','-Xmx32000m',
                     #Flags from igb.sh script:
                     '-Dproduction=true','-Dapple.laf.useScreenMenuBar=true','-Djava.net.preferIPv4Stack=true',
                     '-jar',io.find_jar('igv.jar'),'-b',temp/'batch.txt'])
    def run(self):
        extractions = [ ]
        for item in self.genes.split(','):
            extraction = item.split('/')
            assert len(extraction) == 4
            extractions.append(extraction)
            
        rename = { }
        if self.rename:
            for item in self.rename.split(','):
                old,new = item.split('=')
                rename[old] = new

        work = self.get_workspace()        
        
        with workspace.tempspace() as temp:
            items = list(annotation.read_annotations(self.annotation))
            for item in items:
                item.seqid = rename.get(item.seqid, item.seqid)
            annotation.write_gff3(temp/'temp.gff', get_genes(items, extractions, self.log))
            del items
            
            with open(temp/'temp.fa','wb') as f:
                for name,seq in io.read_sequences(self.genome):
                    name = name.split()[0]
                    name = rename.get(name,name)
                    io.write_fasta(f, name, seq)
            
            reference_directory.Make_tt_reference(
                self.output_dir,
                filenames = [ temp/'temp.fa', temp/'temp.gff' ] + self.extra,
                index = self.index, shrimp = self.shrimp, 
                bowtie = self.bowtie, star = self.star
                ).run()
Esempio n. 3
0
    def run(self):
        extractions = [ ]
        for item in self.genes.split(','):
            extraction = item.split('/')
            assert len(extraction) == 4
            extractions.append(extraction)
            
        rename = { }
        if self.rename:
            for item in self.rename.split(','):
                old,new = item.split('=')
                rename[old] = new

        work = self.get_workspace()        
        
        with workspace.tempspace() as temp:
            items = list(annotation.read_annotations(self.annotation))
            for item in items:
                item.seqid = rename.get(item.seqid, item.seqid)
            annotation.write_gff3(temp/'temp.gff', get_genes(items, extractions, self.log))
            del items
            
            with open(temp/'temp.fa','wb') as f:
                for name,seq in io.read_sequences(self.genome):
                    name = name.split()[0]
                    name = rename.get(name,name)
                    io.write_fasta(f, name, seq)
            
            reference_directory.Make_tt_reference(
                self.output_dir,
                filenames = [ temp/'temp.fa', temp/'temp.gff' ],
                index = self.index,
                ).run()
Esempio n. 4
0
 def run(self):
     with workspace.tempspace() as temp:
         with open(temp/'batch.txt','wb') as f:
             print >> f, 'new'
             print >> f, 'genome '+os.path.abspath(self.genome)
             for filename in self.files:
                 print >> f, 'load '+os.path.abspath(filename)
         
         io.execute(['java','-jar',io.find_jar('igv.jar'),'-b',temp/'batch.txt'])
Esempio n. 5
0
 def tryout(self, ref, variants):
     with workspace.tempspace() as temp:
         job = self.template(temp.working_dir, ref=ref, variants=variants)            
         job.run()            
         
         result = dict( tuple(item.values()) for item in reporting.mine_logs([job.log_filename()]) )
         nesoni_count = int(result['changes found by "nesoni consensus:"'])
         nesoni_good = {'yes':True,'no':False}[result['is correctly patched by "nesoni consensus:"']]
         vcf_count = int(result['variants after filtering'])
         vcf_good = {'yes':True,'no':False}[result['is correctly patched by VCF pipeline']]
         return nesoni_count, nesoni_good, vcf_count, vcf_good
Esempio n. 6
0
 def run(self):
     with workspace.tempspace() as temp:
         with open(temp/'batch.txt','wb') as f:
             print >> f, 'new'
             print >> f, 'preference LAST_TRACK_DIRECTORY', os.getcwd()
             print >> f, 'preference LAST_GENOME_IMPORT_DIRECTORY', os.getcwd()
             print >> f, 'genome '+os.path.abspath(self.genome)
             for filename in self.files:
                 print >> f, 'load '+os.path.abspath(filename)
         
         io.execute(['java','-jar',io.find_jar('igv.jar'),'-b',temp/'batch.txt'])
Esempio n. 7
0
    def run(self):
        assert self.release
        assert self.species
        assert self.assembly
        assert self.dna
        
        extractions = [ ]
        for item in self.genes.split(','):
            extraction = item.split('/')
            assert len(extraction) == 4
            extractions.append(extraction)
            
        rename = { }
        if self.rename:
            for item in self.rename.split(','):
                old,new = item.split('=')
                rename[old] = new

        work = self.get_workspace()        
        ensembl = workspace.Workspace(work/'ensembl')
        
        genome_filename = self.species+"."+self.assembly+"."+self.dna+".fa.gz"
        genome_url = "rsync://ftp.ensembl.org/ensembl/pub/release-"+self.release+"/fasta/"+self.species.lower()+"/dna/"+genome_filename
        
        gff_filename = self.species+"."+self.assembly+"."+self.release+".gff3.gz"
        gff_url = "rsync://ftp.ensembl.org/ensembl/pub/release-"+self.release+"/gff3/"+self.species.lower()+"/"+gff_filename
        
        
        if self.download:
            self.log.log("Fetching "+genome_url+"\n")
            io.execute(['rsync','-aP',genome_url, ensembl/genome_filename])
            self.log.log("Fetching "+gff_url+"\n")
            io.execute(['rsync','-aP',gff_url, ensembl/gff_filename])
        
        with workspace.tempspace() as temp:
            items = list(annotation.read_annotations(ensembl/gff_filename))
            for item in items:
                item.seqid = rename.get(item.seqid, item.seqid)
            annotation.write_gff3(temp/'temp.gff', get_genes(items, extractions, self.log))
            del items
            
            with open(temp/'temp.fa','wb') as f:
                for name,seq in io.read_sequences(ensembl/genome_filename):
                    name = name.split()[0]
                    name = rename.get(name,name)
                    io.write_fasta(f, name, seq)
            
            reference_directory.Make_tt_reference(
                self.output_dir,
                filenames = [ temp/'temp.fa', temp/'temp.gff' ],
                index = self.index,
                ).run()
Esempio n. 8
0
    def tryout(self, ref, variants):
        with workspace.tempspace() as temp:
            job = self.template(temp.working_dir, ref=ref, variants=variants)
            job.run()

            #result = dict( tuple(item.values()) for item in reporting.mine_logs([job.log_filename()]) )
            [result] = reporting.mine_logs([job.log_filename()]).values()
            nesoni_count = int(result['changes found by "nesoni consensus:"'])
            nesoni_good = {
                'yes': True,
                'no': False
            }[result['is correctly patched by "nesoni consensus:"']]
            vcf_count = int(result['variants after filtering'])
            vcf_good = {
                'yes': True,
                'no': False
            }[result['is correctly patched by VCF pipeline']]
            return nesoni_count, nesoni_good, vcf_count, vcf_good
Esempio n. 9
0
    def run(self):
        assert self.reads or self.pairs or self.interleaved, 'No reads given'
    
        io.check_name_uniqueness(self.reads, self.pairs, self.interleaved)
        
        working = self.get_workspace()
        working.setup_reference(self.references, bowtie=True)
        working.update_param(snp_cost=2.0)        
        reference = working.get_reference()
        
        log_file = open(self.log_filename(),'wb')
              
        with workspace.tempspace(dir=working.working_dir) as temp:
            n = [ 0 ]
            def tempname():
                n[0] += 1
                return temp/('%d.fq'%n[0])
            def convert(filename):
                info = io.get_file_info(filename)
                ok = selection.matches('type-fastq:[compression-none/compression-gzip/compression-bzip2]', info)
                if ok:
                    return filename            
                result_name = tempname()
                with open(result_name,'wb') as f:
                    for name, seq, qual in io.read_sequences(filename, qualities='required'):
                        io.write_fastq(f, name, seq, qual)
                return result_name
            
            ones = [ ]
            twos = [ ]
            singles = [ ]
            
            for pair in self.pairs:
                assert len(pair) == 2, 'Need two files in each "pair:" section.'
                ones.append(convert(pair[0]))
                twos.append(convert(pair[1]))
            
            for item in self.interleaved:
                left_name = tempname()
                right_name = tempname()
                ones.append(left_name)
                twos.append(right_name)
                with open(left_name,'wb') as left, \
                     open(right_name,'wb') as right:
                    reader = io.read_sequences(item, qualities='required')
                    while True:
                        try:
                            name, seq, qual = reader.next()
                        except StopIteration:
                            break
                        io.write_fastq(left, name,seq,qual)
                        
                        try:
                            name, seq, qual = reader.next()
                        except StopIteration:
                            raise grace.Error('Interleaved file contains odd number of sequences')
                        io.write_fastq(right, name,seq,qual)
            
            for item in self.reads:
                singles.append(convert(item))

            cores = min(self.cores, legion.coordinator().get_cores())

            command = (
                [ 'bowtie2', 
                    '--threads', str(cores),
                    '--rg-id', '1',
                    '--rg', 'SM:'+working.name,                    
                    ] + 
                self.bowtie_options + 
                [ '-x', reference.get_bowtie_index_prefix() ]
                )
            commands = [ ]
            if ones:
                commands.append(command + [ '-1', ','.join(ones), '-2', ','.join(twos) ])
            if singles:
                commands.append(command + [ '-U', ','.join(singles) ])
            
            temp_bam_name = temp/'temp.bam'

            with io.pipe_to(
                     ['samtools', 'view', '-S', '-b', '-'],
                     stdout=open(temp_bam_name,'wb'),
                     stderr=log_file
                     ) as f:
                header_sent = False
                for command in commands:
                    self.log.log('Running:\n' + ' '.join(command) + '\n')            
                    with io.pipe_from(
                        command,
                        stderr=log_file,
                        cores=cores
                        ) as f_out:
                        for line in f_out:
                            if not header_sent or not line.startswith('@'):
                                f.write(line)
                    header_sent = True

            #io.execute([
            #    'samtools', 'sort', '-n', temp_bam_name, working/'alignments'
            #    ])
            
            sam.sort_bam(temp_bam_name, working/'alignments', by_name=True, cores=self.cores)
            
        log_file.close()
Esempio n. 10
0
    def run(self):
        assert self.reads or self.pairs or self.interleaved, 'No reads given'

        io.check_name_uniqueness(self.reads, self.pairs, self.interleaved)

        working = self.get_workspace()
        working.setup_reference(self.references, bowtie=True)
        working.update_param(snp_cost=2.0)
        reference = working.get_reference()

        log_file = open(self.log_filename(), 'wb')

        with workspace.tempspace(dir=working.working_dir) as temp:
            n = [0]

            def tempname():
                n[0] += 1
                return temp / ('%d.fq' % n[0])

            def convert(filename):
                info = io.get_file_info(filename)
                ok = selection.matches(
                    'type-fastq:[compression-none/compression-gzip/compression-bzip2]',
                    info)
                if ok:
                    return filename
                result_name = tempname()
                with open(result_name, 'wb') as f:
                    for name, seq, qual in io.read_sequences(
                            filename, qualities='required'):
                        io.write_fastq(f, name, seq, qual)
                return result_name

            ones = []
            twos = []
            singles = []

            for pair in self.pairs:
                assert len(
                    pair) == 2, 'Need two files in each "pair:" section.'
                ones.append(convert(pair[0]))
                twos.append(convert(pair[1]))

            for item in self.interleaved:
                left_name = tempname()
                right_name = tempname()
                ones.append(left_name)
                twos.append(right_name)
                with open(left_name,'wb') as left, \
                     open(right_name,'wb') as right:
                    reader = io.read_sequences(item, qualities='required')
                    while True:
                        try:
                            name, seq, qual = reader.next()
                        except StopIteration:
                            break
                        io.write_fastq(left, name, seq, qual)

                        try:
                            name, seq, qual = reader.next()
                        except StopIteration:
                            raise grace.Error(
                                'Interleaved file contains odd number of sequences'
                            )
                        io.write_fastq(right, name, seq, qual)

            for item in self.reads:
                singles.append(convert(item))

            cores = min(self.cores, legion.coordinator().get_cores())

            command = ([
                'bowtie2',
                '--threads',
                str(cores),
                '--rg-id',
                '1',
                '--rg',
                'SM:' + working.name,
            ] + self.bowtie_options +
                       ['-x', reference.get_bowtie_index_prefix()])
            commands = []
            if ones:
                commands.append(command +
                                ['-1', ','.join(ones), '-2', ','.join(twos)])
            if singles:
                commands.append(command + ['-U', ','.join(singles)])

            temp_bam_name = temp / 'temp.bam'

            with io.pipe_to(['samtools', 'view', '-S', '-b', '-'],
                            stdout=open(temp_bam_name, 'wb'),
                            stderr=log_file) as f:
                header_sent = False
                for command in commands:
                    self.log.log('Running:\n' + ' '.join(command) + '\n')
                    with io.pipe_from(command, stderr=log_file,
                                      cores=cores) as f_out:
                        for line in f_out:
                            if not header_sent or not line.startswith('@'):
                                f.write(line)
                    header_sent = True

            #io.execute([
            #    'samtools', 'sort', '-n', temp_bam_name, working/'alignments'
            #    ])

            sam.sort_bam(temp_bam_name,
                         working / 'alignments',
                         by_name=True,
                         cores=self.cores)

        log_file.close()
Esempio n. 11
0
    def run(self):
        assert self.ucsc_name, 'Need a UCSC genome name'
        
        scratch = _ucsc_scratch(self)
        
        # Load annotations
        
        source = 'tt-ucsc-%s-%s' % (self.ucsc_name, self.table)
        
        table = scratch.get_table(self.table)
        get_name = scratch.getter(self.name)
        get_product = scratch.getter(self.product)

        mrnas = [ ]
        
        for item in table:
            ann = annotation.Annotation(
                seqid = item.chrom,
                source = source,
                type = 'mRNA',
                strand = {'+':1, '-':-1}[item.strand],
                start = int(item.txStart),
                end = int(item.txEnd),
                attr = {
                    'ID' : item.name,
                    'Name' : get_name(item),
                    'Product' : get_product(item),
                    #'UCSC_name2' : item.name2,
                    }
                )
            
            ann.record = item
            mrnas.append(ann)

        _uniquify_ids(mrnas)
        
        annotations = [ ]
        
        for group in _grouped_features(mrnas):
            ID = '/'.join(item.attr['ID'] for item in group)
            for item in group:
                item.attr['Parent'] = ID
                item.attr['ID'] = item.attr['ID'] + '-mRNA'
            
            annotations.append(annotation.Annotation(
                source = source,
                type = 'gene',
                seqid = group[0].seqid,
                strand = group[0].strand,
                start = min(item.start for item in group),
                end = max(item.end for item in group),
                attr = {
                    'ID' : ID,
                    'Name' : annotation_tools.join_descriptions([ item.attr['Name'] for item in group ], '/'),
                    'Product' : annotation_tools.join_descriptions([ item.attr['Product'] for item in group ], '/'),
                    #'UCSC_name2' : annotation_tools.join_descriptions([ item.attr['UCSC_name2'] for item in group ], '/'),
                    }
                ))
            for item in group:
                annotations.append(item)
                
                exonStarts = _parse_ints(item.record.exonStarts)
                exonEnds = _parse_ints(item.record.exonEnds)
                cdsStart = int(item.record.cdsStart)
                cdsEnd = int(item.record.cdsEnd)
                for start,end in zip(exonStarts,exonEnds):
                    annotations.append(annotation.Annotation(
                        source = source,
                        type = 'exon',
                        seqid = item.seqid,
                        strand = item.strand,
                        start = start,
                        end = end,
                        attr = {
                            'Parent' : item.attr['ID'],
                            }
                        ))
                    if max(cdsStart,start) < min(cdsEnd,end):
                        annotations.append(annotation.Annotation(
                            source = source,
                            type = 'CDS',
                            seqid = item.seqid,
                            strand = item.strand,
                            start = max(cdsStart,start),
                            end = min(cdsEnd,end),
                            #TODO: phase
                            attr = {
                                'Parent' : item.attr['ID'],
                                }
                            ))

        # Load sequence
        
        if self.download:
            io.execute(['rsync','-P','rsync://hgdownload.cse.ucsc.edu/goldenPath/'+self.ucsc_name+'/bigZips/chromFa.tar.gz',scratch.ucsc/'chromFa.tar.gz'])
        
        with workspace.tempspace() as temp:
            io.execute(['tar','-C',temp.working_dir,'-zxf',scratch.ucsc/'chromFa.tar.gz'])
            sequences = [ temp/item for item in natural_sorted(os.listdir(temp.working_dir)) ]
            
            with open(temp/'reference.gff','wb') as f:
                annotation.write_gff3_header(f)
                for item in annotations:
                    print >> f, item.as_gff()
            
            Make_tt_reference(
                self.output_dir,
                filenames = sequences + [ temp/'reference.gff' ],
                index = self.index,
                ).run()
Esempio n. 12
0
    def run(self):
        bams = [ ]
        reference = None
        reference2 = None
        
        extra = [ ]
        
        for sample in self.samples:
            if sam.is_bam(sample):
                bams.append(sample)
            elif os.path.isdir(sample):
                working = working_directory.Working(sample,True)
                bams.append( working.get_filtered_sorted_bam() )
                extra.append( '##sampleTags=' + ','.join(working.get_tags()) )
                if reference2 is None:
                    reference2 = working.get_reference().reference_fasta_filename()
            elif io.is_sequence_file(sample):
                assert reference is None, 'Only one reference FASTA file allowed.'
                reference = sample
        
        if reference is None:
            reference = reference2
        if reference is None:
            raise grace.Error('No reference FASTA file given.')
        
        with nesoni.Stage() as stage:
            tempspace = stage.enter( workspace.tempspace() )
            if self.depth_limit:
                with nesoni.Stage() as stage2:
                    for i in xrange(len(bams)):
                        sam.Bam_depth_limit(
                            tempspace/('%d'%i), 
                            bams[i], 
                            depth=self.depth_limit
                            ).process_make(stage2)
                        bams[i] = tempspace/('%d.bam'%i)
            
            # FreeBayes claims to handle multiple bams, but it doesn't actually work
            if len(bams) > 1:
                sam.Bam_merge(tempspace/'merged', bams=bams, index=False).run()
                bams = [ tempspace/'merged.bam' ]
        
            command = [ 
                'freebayes',
                '-f', reference,
                '--ploidy',str(self.ploidy),
                '--pvar',str(self.pvar),
                ] + self.freebayes_options + bams
            
            self.log.log('Running: '+' '.join(command)+'\n')
        
            f_out = stage.enter( open(self.prefix+'.vcf','wb') )
            f_in  = stage.enter( io.pipe_from(command) )
            done_extra = False
            for line in f_in:
                if not done_extra and not line.startswith('##'):
                    for extra_line in extra:
                        f_out.write(extra_line+'\n')
                    done_extra = True
                f_out.write(line)

        index_vcf(self.prefix+'.vcf')
Esempio n. 13
0
    def run(self):
        bams = []
        reference = None
        reference2 = None

        extra = []

        for sample in self.samples:
            if sam.is_bam(sample):
                bams.append(sample)
            elif os.path.isdir(sample):
                working = working_directory.Working(sample, True)
                bams.append(working.get_filtered_sorted_bam())
                extra.append('##sampleTags=' + ','.join(working.get_tags()))
                if reference2 is None:
                    reference2 = working.get_reference(
                    ).reference_fasta_filename()
            elif io.is_sequence_file(sample):
                assert reference is None, 'Only one reference FASTA file allowed.'
                reference = sample

        if reference is None:
            reference = reference2
        if reference is None:
            raise grace.Error('No reference FASTA file given.')

        with nesoni.Stage() as stage:
            tempspace = stage.enter(workspace.tempspace())
            if self.depth_limit:
                with nesoni.Stage() as stage2:
                    for i in xrange(len(bams)):
                        sam.Bam_depth_limit(
                            tempspace / ('%d' % i),
                            bams[i],
                            depth=self.depth_limit).process_make(stage2)
                        bams[i] = tempspace / ('%d.bam' % i)

            # FreeBayes claims to handle multiple bams, but it doesn't actually work
            if len(bams) > 1:
                sam.Bam_merge(tempspace / 'merged', bams=bams,
                              index=False).run()
                bams = [tempspace / 'merged.bam']

            command = [
                'freebayes',
                '-f',
                reference,
                '--ploidy',
                str(self.ploidy),
                '--pvar',
                str(self.pvar),
            ] + self.freebayes_options + bams

            self.log.log('Running: ' + ' '.join(command) + '\n')

            f_out = stage.enter(open(self.prefix + '.vcf', 'wb'))
            f_in = stage.enter(io.pipe_from(command))
            done_extra = False
            for line in f_in:
                if not done_extra and not line.startswith('##'):
                    for extra_line in extra:
                        f_out.write(extra_line + '\n')
                    done_extra = True
                f_out.write(line)

        index_vcf(self.prefix + '.vcf')
    def run(self):
        assert self.ucsc_name, 'Need a UCSC genome name'

        scratch = _ucsc_scratch(self)

        # Load annotations

        source = 'tt-ucsc-%s-%s' % (self.ucsc_name, self.table)

        table = scratch.get_table(self.table)
        get_name = scratch.getter(self.name)
        get_product = scratch.getter(self.product)

        mrnas = []

        for item in table:
            ann = annotation.Annotation(
                seqid=item.chrom,
                source=source,
                type='mRNA',
                strand={
                    '+': 1,
                    '-': -1
                }[item.strand],
                start=int(item.txStart),
                end=int(item.txEnd),
                attr={
                    'ID': item.name,
                    'Name': get_name(item),
                    'Product': get_product(item),
                    #'UCSC_name2' : item.name2,
                })

            ann.record = item
            mrnas.append(ann)

        _uniquify_ids(mrnas)

        annotations = []

        for group in _grouped_features(mrnas):
            ID = '/'.join(item.attr['ID'] for item in group)
            for item in group:
                item.attr['Parent'] = ID
                item.attr['ID'] = item.attr['ID'] + '-mRNA'

            annotations.append(
                annotation.Annotation(
                    source=source,
                    type='gene',
                    seqid=group[0].seqid,
                    strand=group[0].strand,
                    start=min(item.start for item in group),
                    end=max(item.end for item in group),
                    attr={
                        'ID':
                        ID,
                        'Name':
                        annotation_tools.join_descriptions(
                            [item.attr['Name'] for item in group], '/'),
                        'Product':
                        annotation_tools.join_descriptions(
                            [item.attr['Product'] for item in group], '/'),
                        #'UCSC_name2' : annotation_tools.join_descriptions([ item.attr['UCSC_name2'] for item in group ], '/'),
                    }))
            for item in group:
                annotations.append(item)

                exonStarts = _parse_ints(item.record.exonStarts)
                exonEnds = _parse_ints(item.record.exonEnds)
                cdsStart = int(item.record.cdsStart)
                cdsEnd = int(item.record.cdsEnd)
                for start, end in zip(exonStarts, exonEnds):
                    annotations.append(
                        annotation.Annotation(source=source,
                                              type='exon',
                                              seqid=item.seqid,
                                              strand=item.strand,
                                              start=start,
                                              end=end,
                                              attr={
                                                  'Parent': item.attr['ID'],
                                              }))
                    if max(cdsStart, start) < min(cdsEnd, end):
                        annotations.append(
                            annotation.Annotation(
                                source=source,
                                type='CDS',
                                seqid=item.seqid,
                                strand=item.strand,
                                start=max(cdsStart, start),
                                end=min(cdsEnd, end),
                                #TODO: phase
                                attr={
                                    'Parent': item.attr['ID'],
                                }))

        # Load sequence

        if self.download:
            io.execute([
                'rsync', '-P', 'rsync://hgdownload.cse.ucsc.edu/goldenPath/' +
                self.ucsc_name + '/bigZips/chromFa.tar.gz',
                scratch.ucsc / 'chromFa.tar.gz'
            ])

        with workspace.tempspace() as temp:
            io.execute([
                'tar', '-C', temp.working_dir, '-zxf',
                scratch.ucsc / 'chromFa.tar.gz'
            ])
            sequences = [
                temp / item
                for item in natural_sorted(os.listdir(temp.working_dir))
            ]

            with open(temp / 'reference.gff', 'wb') as f:
                annotation.write_gff3_header(f)
                for item in annotations:
                    print >> f, item.as_gff()

            Make_tt_reference(
                self.output_dir,
                filenames=sequences + [temp / 'reference.gff'],
                index=self.index,
            ).run()