コード例 #1
0
    def __init__(self, *filenames, **kwargs):  #data_format=None,printer=None):
        """Create a |BigBedGenomeHash|
        
        Parameters
        ----------
        filenames : str or list of str
            Filename or list of filenames of `Tabix`_-compressed files

        data_format : str
            Format of tabix-compressed file(s). Choices are:
            `'GTF2'`,`'GFF3'`,`'BED'`,`'PSL'` (Default: `GTF2`)
        """
        from pysam import Tabixfile
        if len(filenames) == 1 and isinstance(filenames[0], list):
            filenames = filenames[0]

        self.filenames = list(multiopen(filenames))
        self.printer = kwargs.get("printer", NullWriter())
        data_format = kwargs.get("data_format", "GTF2")
        try:
            self._reader_class = TabixGenomeHash._READERS[data_format]
        except ValueError:
            msg = "Supported file formats for TabixGenomeHash are: %s" % ", ".join(
                sorted(TabixGenomeHash._READERS.keys()))
            self.printer.write(msg)
            raise ValueError(msg)

        self.tabix_readers = [Tabixfile(X) for X in self.filenames]
コード例 #2
0
    def __iter__(self):
        from pysam import Tabixfile, asTuple
        f = Tabixfile(self.filename, mode='r')
        try:
            # header row
            if self.header is not None:
                yield self.header
            else:
                # assume last header line has fields
                h = list(f.header)
                if len(h) > 0:
                    header_line = text_type(h[-1], encoding='ascii')
                    yield tuple(header_line.split('\t'))

            # data rows
            for row in f.fetch(reference=self.reference,
                               start=self.start,
                               end=self.stop,
                               region=self.region,
                               parser=asTuple()):
                yield tuple(row)

        except:
            raise
        finally:
            f.close()
コード例 #3
0
ファイル: variant_consumer.py プロジェクト: tzhughes/genmod
    def __init__(self, task_queue, results_queue, family, args):
        multiprocessing.Process.__init__(self)
        self.task_queue = task_queue
        self.family = family
        self.results_queue = results_queue
        self.verbosity = args.verbose
        self.phased = args.phased
        self.cadd_file = args.cadd_file[0]
        self.chr_prefix = args.chr_prefix

        if self.cadd_file:
            self.cadd_file = Tabixfile(self.cadd_file, parser=asTuple())
コード例 #4
0
 def __init__(self, chromosome, position, annotation_table_file):
     annotation_table = Tabixfile(annotation_table_file)
     self.line = annotation_table.fetch(reference=chromosome,
                                        start=position - 1,
                                        end=position).next()
     self.chromosome, \
     self.position, \
     self.reference_base, \
     self.genic, \
     self.exonic, \
     self.intronic, \
     self.intergenic, \
     self.utr5, \
     self.utr3, \
     self.fold0, \
     self.fold4, \
     self.fold2, \
     self.fold3, \
     self.CDS, \
     self.mRNA, \
     self.rRNA, \
     self.tRNA, \
     self.feature_names, \
     self.feature_types, \
     self.feature_ID, \
     self.cds_position, \
     self.strand, \
     self.frame, \
     self.codon, \
     self.aa, \
     self.degen, \
     self.FPKM, \
     self.rho, \
     self.FAIRE, \
     self.recombination, \
     self.mutability, \
     self.quebec_alleles = self.line.split('\t')
     self.position = int(self.position)
     annotation_table.close()
コード例 #5
0
 def __iter__(self):
     try:
         from pysam import Tabixfile, asTuple
     except ImportError as e:
         raise UnsatisfiedDependency(e, dep_message)
     f = Tabixfile(self.filename, mode='r')
     try:
         # header row
         if self.header is not None:
             yield self.header
         else:
             # assume last header line has fields
             h = list(f.header)
             if len(h) > 0:
                 yield tuple(h[-1].split('\t'))
         # data rows
         for row in f.fetch(reference=self.reference, start=self.start, end=self.end, region=self.region, parser=asTuple()):
             yield tuple(row)
     except:
         raise
     finally:
         f.close()
コード例 #6
0
genofins = []
for line in genofinfile:
	genofins.append(line.strip().split()[1])

genofinfile.close()

genoinds = [genofins.index(x) + 6 for x in officialfindivs]
y = {}
currbimbam = open(currfiles + '.bimbam','w')
#t0 = time.time()
for snp in masterdic.keys():
#for snp in masterdic.keys()[0:1000]:
	chrm = masterdic[snp][0]
	if chrm == 'chrm':
		continue
	tabixer = Tabixfile('/mnt/lustre/home/cusanovich/500HT/Imputed1415/ByChr/hutt.all.imputed.' + chrm + '.txt.gz')
	tempgenos = [x.split('\t') for x in tabixer.fetch(chrm,int(masterdic[snp][1])-1,int(masterdic[snp][2]))][0]
	genos = [tempgenos[x] for x in range(0,6) + genoinds]
	tabixer.close()
	y[snp] = [genos[3], 'A', 'G'] + genos[6:]
	print >> currbimbam, ", ".join(y)

#t1 = time.time()
#print t1-t0
currbimbam.close()

#genomat = matrix_reader(genodir + 'hutt.imputed.dhssnps.bimbam',sep=",")
print "Running GEMMA..."
gemmer = (hmdir + 'Programs/gemma0.94 -g ' + currfiles + '.bimbam -p ' + currfiles + '.pheno -k ' + currfiles + '.square.txt -c ' + currfiles + '.covariates -lmm 4 -maf 0.05 -o curr_' + pheno)
t0 = time.time()
ifier(gemmer)