Ejemplo n.º 1
0
def main(args, options):
    filters = {}
    if len(args) == 2:
        if options.bestn:
            filters['bestn'] = int(options.bestn)
        if options.identity:
            filters['identity'] = float(options.identity)
        if options.match_percent:
            filters['match'] = float(options.match_percent)
        if options.unique:
            filters['unique'] = True
        if options.query_len:
            filters['qlen'] = int(options.query_len)
        if options.target:
            if os.path.exists(options.target):
                filters['target'] = get_targets(options.target)
            else:
                filters['target'] = [options.target]

        # extracts contigs
        contigs = None
        if options.fasta:
            contigs = get_contigs(options.fasta, options.k)
            
	# extracts reference sequence
        refseq = None
	if options.ref:
	    refseq = tools.get_refseq_from_2bit(options.annodir, options.genome)

        # contig sequences are in batches
        inputs = []
        if options.batch:
            inputs = batch(args[1], args[0])
        else:
            inputs.append(args[0])

        if options.batch:
            if options.out and os.path.exists(options.out):
                os.remove(options.out)
                
        for i in range(len(inputs)):
            infile = inputs[i]
            # for deciding if header needs to be output
            first = False
            if i == 0:
                first = True

	    aligns = parse(infile, args[1], filters)
            if options.out:
                output_aligns(options.out, aligns, options.track_name, contigs=contigs, 
		              append=options.batch, header=first, 
		              genome=options.genome, refseq=refseq, 
		              color=options.color, by_fasta=options.by_fasta, annodir=options.annodir)

	# zip up track
	if options.gzip:
	    os.system('gzip --force --best ' + options.out)
	    
    else:
        parser.error("incorrect number of arguments")
Ejemplo n.º 2
0
    def prepare_annotation(self):
        """Prepares for overlapping annotations"""
        if self.genome:
            # for conversion of chromosome name between FASTA and annotation (hg19)
            self.chrom_proper = tools.ucsc_chroms(self.genome, self.annodir)
            
            splice_motif_file = os.path.join(self.annodir, self.genome, 'splice_motifs.txt')
            if os.path.isfile(splice_motif_file):
                self.splice_motif_file = splice_motif_file
                
            # for constructing cDNA sequences for in/out frame determination
            self.refseq = tools.get_refseq_from_2bit(self.annodir, self.genome)
            
            # for finding genes, exons, etc
            if self.gene_model:
                self.ff = FeatureFinder(self.genome, self.gene_model, self.annodir, self.mmcfg)

            # for getting rid of contigs that mapped entirely with repeats
            self.repeat_overlaps = repeat.prepare_overlap(self.genome, self.annodir)