def __init__(self, *filenames, **kwargs): #data_format=None,printer=None): """Create a |BigBedGenomeHash| Parameters ---------- filenames : str or list of str Filename or list of filenames of `Tabix`_-compressed files data_format : str Format of tabix-compressed file(s). Choices are: `'GTF2'`,`'GFF3'`,`'BED'`,`'PSL'` (Default: `GTF2`) """ from pysam import Tabixfile if len(filenames) == 1 and isinstance(filenames[0], list): filenames = filenames[0] self.filenames = list(multiopen(filenames)) self.printer = kwargs.get("printer", NullWriter()) data_format = kwargs.get("data_format", "GTF2") try: self._reader_class = TabixGenomeHash._READERS[data_format] except ValueError: msg = "Supported file formats for TabixGenomeHash are: %s" % ", ".join( sorted(TabixGenomeHash._READERS.keys())) self.printer.write(msg) raise ValueError(msg) self.tabix_readers = [Tabixfile(X) for X in self.filenames]
def __iter__(self): from pysam import Tabixfile, asTuple f = Tabixfile(self.filename, mode='r') try: # header row if self.header is not None: yield self.header else: # assume last header line has fields h = list(f.header) if len(h) > 0: header_line = text_type(h[-1], encoding='ascii') yield tuple(header_line.split('\t')) # data rows for row in f.fetch(reference=self.reference, start=self.start, end=self.stop, region=self.region, parser=asTuple()): yield tuple(row) except: raise finally: f.close()
def __init__(self, task_queue, results_queue, family, args): multiprocessing.Process.__init__(self) self.task_queue = task_queue self.family = family self.results_queue = results_queue self.verbosity = args.verbose self.phased = args.phased self.cadd_file = args.cadd_file[0] self.chr_prefix = args.chr_prefix if self.cadd_file: self.cadd_file = Tabixfile(self.cadd_file, parser=asTuple())
def __init__(self, chromosome, position, annotation_table_file): annotation_table = Tabixfile(annotation_table_file) self.line = annotation_table.fetch(reference=chromosome, start=position - 1, end=position).next() self.chromosome, \ self.position, \ self.reference_base, \ self.genic, \ self.exonic, \ self.intronic, \ self.intergenic, \ self.utr5, \ self.utr3, \ self.fold0, \ self.fold4, \ self.fold2, \ self.fold3, \ self.CDS, \ self.mRNA, \ self.rRNA, \ self.tRNA, \ self.feature_names, \ self.feature_types, \ self.feature_ID, \ self.cds_position, \ self.strand, \ self.frame, \ self.codon, \ self.aa, \ self.degen, \ self.FPKM, \ self.rho, \ self.FAIRE, \ self.recombination, \ self.mutability, \ self.quebec_alleles = self.line.split('\t') self.position = int(self.position) annotation_table.close()
def __iter__(self): try: from pysam import Tabixfile, asTuple except ImportError as e: raise UnsatisfiedDependency(e, dep_message) f = Tabixfile(self.filename, mode='r') try: # header row if self.header is not None: yield self.header else: # assume last header line has fields h = list(f.header) if len(h) > 0: yield tuple(h[-1].split('\t')) # data rows for row in f.fetch(reference=self.reference, start=self.start, end=self.end, region=self.region, parser=asTuple()): yield tuple(row) except: raise finally: f.close()
genofins = [] for line in genofinfile: genofins.append(line.strip().split()[1]) genofinfile.close() genoinds = [genofins.index(x) + 6 for x in officialfindivs] y = {} currbimbam = open(currfiles + '.bimbam','w') #t0 = time.time() for snp in masterdic.keys(): #for snp in masterdic.keys()[0:1000]: chrm = masterdic[snp][0] if chrm == 'chrm': continue tabixer = Tabixfile('/mnt/lustre/home/cusanovich/500HT/Imputed1415/ByChr/hutt.all.imputed.' + chrm + '.txt.gz') tempgenos = [x.split('\t') for x in tabixer.fetch(chrm,int(masterdic[snp][1])-1,int(masterdic[snp][2]))][0] genos = [tempgenos[x] for x in range(0,6) + genoinds] tabixer.close() y[snp] = [genos[3], 'A', 'G'] + genos[6:] print >> currbimbam, ", ".join(y) #t1 = time.time() #print t1-t0 currbimbam.close() #genomat = matrix_reader(genodir + 'hutt.imputed.dhssnps.bimbam',sep=",") print "Running GEMMA..." gemmer = (hmdir + 'Programs/gemma0.94 -g ' + currfiles + '.bimbam -p ' + currfiles + '.pheno -k ' + currfiles + '.square.txt -c ' + currfiles + '.covariates -lmm 4 -maf 0.05 -o curr_' + pheno) t0 = time.time() ifier(gemmer)