Beispiel #1
0
                SNPs_index, keys = mapper.get()
            else:
                ch = mapper.chunk_pop()
                if ch is None:
                    SNPs_index = None
                    break
                SNPs_index, keys = mapper.get(chunk_number=ch)

            if isinstance(SNPs_index, type(None)):
                break

            Analyser.rsid = keys
            if np.sum(PD) == 0:
                genotype = np.array([])
                with Timer() as t_g:
                    genotype = merge_genotype(gen, SNPs_index, mapper)
                    genotype = genotype[:, row_index[0]]
                print "Time to get G {}s".format(t_g.secs)
            #TODO (low) add interaction

            a_test = np.array([])
            b_cov = np.array([])
            C = np.array([])
            a_cov = np.array([])
            b4 = np.array([])

            if args.protocol is not None:
                if protocol.enable:
                    regression_model = protocol.regression_model()
            else:
                regression_model = None
Beispiel #2
0
def haseregression(phen,
                   gen,
                   cov,
                   mapper,
                   Analyser,
                   maf,
                   intercept=True,
                   interaction=None):

    g = tuple([i.folder._data for i in gen])

    row_index, ids = study_indexes(phenotype=phen.folder._data,
                                   genotype=g,
                                   covariates=cov.folder._data)

    if mapper is not None:
        SNP = [0, 0, mapper.n_keys]
    else:
        SNP = [0, 0, 'unknown']

    covariates = cov.get_next(index=row_index[2])
    a_cov = A_covariates(covariates, intercept=intercept)

    while True:
        gc.collect()
        if mapper is not None:
            if mapper.cluster == 'n':
                SNPs_index, keys = mapper.get()
            else:
                ch = mapper.chunk_pop()
                if ch is None:
                    SNPs_index = None
                    break
                SNPs_index, keys = mapper.get(chunk_number=ch)
            if isinstance(SNPs_index, type(None)):
                break
            Analyser.rsid = keys
        else:
            SNPs_index = None

        with Timer() as t:
            genotype = merge_genotype(gen, SNPs_index, mapper)
        print('time to read and merge genotype {}s'.format(t.secs))
        gc.collect()
        if genotype is None:
            print 'All genotype processed!'
            break
        SNP[0] += genotype.shape[0]
        genotype = genotype[:, row_index[0]]

        if mapper is None:
            Analyser.rsid = np.array(range(genotype.shape[0]))

        MAF = np.mean(genotype, axis=1) / 2
        STD = np.std(genotype, axis=1)

        if maf != 0:

            filter = (MAF > maf) & (MAF < 1 - maf) & (MAF != 0.5)
            genotype = genotype[filter, :]
            Analyser.MAF = MAF[filter]
            Analyser.rsid = Analyser.rsid[filter]

            if genotype.shape[0] == 0:
                print 'NO SNPs > MAF'
                continue

        else:
            Analyser.MAF = MAF

        SNP[1] += genotype.shape[0]

        while True:
            phenotype = phen.get_next(index=row_index[1])

            if isinstance(phenotype, type(None)):
                phen.folder.processed = 0
                print 'All phenotypes processed!'
                break

            if phen.permutation:
                np.random.shuffle(phenotype)

            b_cov = B_covariates(covariates, phenotype, intercept=intercept)

            C = C_matrix(phenotype)

            if interaction is not None:
                pass

            a_test = A_tests(covariates, genotype, intercept=intercept)
            a_inv = A_inverse(a_cov, a_test)

            N_con = a_inv.shape[1] - 1

            DF = (phenotype.shape[0] - a_inv.shape[1])

            b4 = B4(phenotype, genotype)

            t_stat, SE = HASE(b4, a_inv, b_cov, C, N_con, DF)
            print('Read {}, processed {}, total {}'.format(
                SNP[0], SNP[1], SNP[2]))
            Analyser.t_stat = t_stat
            Analyser.SE = SE
            if mapper is not None and mapper.cluster == 'y':
                Analyser.cluster = True
                Analyser.chunk = ch
                Analyser.node = mapper.node[1]
            if phen.permutation:
                Analyser.permutation = True
            Analyser.save_result(
                phen.folder._data.names[phen.folder._data.start:phen.folder.
                                        _data.finish])
            t_stat = None
            Analyser.t_stat = None
            del b4
            del C
            del b_cov
            del a_inv
            del a_test
            del t_stat
            gc.collect()

    if Analyser.cluster:
        np.save(
            os.path.join(Analyser.out,
                         str(Analyser.node) + '_node_RSID.npy'),
            Analyser.rsid_dic)
Beispiel #3
0
	while True:
		if args.cluster=='n':
			SNPs_index, keys=mapper.get_next()
		else:
			chunk=mapper.chunk_pop()
			if chunk is None:
				SNPs_index=None
				break
			print chunk
			SNPs_index, keys=mapper.get_chunk(chunk)

		if SNPs_index is None:
			break
		RSID.append(keys)

		data=merge_genotype(gen, SNPs_index) #TODO (high) add mapper
		print data.shape
		if args.cluster=='n':
			h5_gen_file = tables.openFile(
				os.path.join(args.out,str(hdf5_iter)+'_'+h5_name+'.h5'), 'w', title=args.save_name)
		else:#TODO (high) check!
			h5_gen_file = tables.openFile(
				os.path.join(args.out,str(chunk[0])+'_' +str(chunk[1])+'_'+h5_name+'.h5'), 'w', title=args.save_name)
		hdf5_iter+=1

		atom = tables.Int8Atom()  # TODO (low) check data format
		genotype = h5_gen_file.createCArray(h5_gen_file.root, 'genotype', atom,
											(data.shape),
											title='Genotype',
											filters=pytable_filter)
		genotype[:] = data
Beispiel #4
0
def haseregression(phen,gen,cov, mapper, Analyser, maf,intercept=True):

	g=tuple( [i.folder._data for i in gen ] )

	row_index, ids =  study_indexes(phenotype=phen.folder._data,
											   genotype=g,
											   covariates=cov.folder._data)

	if mapper is not None:
		SNP=[0,0,mapper.n_keys]
	else:
		SNP=[0,0,'unknown']

	covariates=cov.get_next(index=row_index[2])
	a_cov=A_covariates(covariates,intercept=intercept)

	while True:
		gc.collect()
		if mapper is not None:
			if mapper.cluster=='n':
				SNPs_index, keys=mapper.get()
			else:
				ch=mapper.chunk_pop()
				if ch is None:
					SNPs_index=None
					break
				print ch
				SNPs_index, keys=mapper.get(chunk_number=ch)
			if isinstance(SNPs_index, type(None)):
				break
			Analyser.rsid=keys
		else:
			SNPs_index=None

		with Timer() as t:
			genotype=merge_genotype(gen, SNPs_index, mapper)
		print ('time to read and merge genotype {}s'.format(t.secs))
		gc.collect()
		if genotype is None:
			print 'All genotype processed!'
			break
		SNP[0]+=genotype.shape[0]
		genotype=genotype[:,row_index[0]]

		if mapper is None:
			Analyser.rsid=np.array(range(genotype.shape[0]))


		MAF=np.mean(genotype, axis=1)/2

		if maf!=0:

			filter=(MAF>maf) & (MAF<1-maf) & (MAF!=0.5)
			genotype=genotype[filter,:]
			Analyser.MAF=MAF[filter]
			Analyser.rsid=Analyser.rsid[filter]

			if genotype.shape[0]==0:
				print 'NO SNPs > MAF'
				continue

		else:
			Analyser.MAF=MAF

		SNP[1]+=genotype.shape[0]

		while True:
			phenotype=phen.get_next(index=row_index[1])

			if isinstance(phenotype, type(None)):
				phen.folder.processed=0
				print 'All phenotypes processed!'
				break

			if phen.permutation:
				np.random.shuffle(phenotype)

			b_cov=B_covariates(covariates,phenotype,intercept=intercept)

			C=C_matrix(phenotype)
			a_test=A_tests(covariates,genotype,intercept=intercept)
			a_inv=A_inverse(a_cov,a_test)

			N_con=a_inv.shape[1] - 1

			DF=(phenotype.shape[0] - a_inv.shape[1])

			b4=B4(phenotype,genotype)


			t_stat, SE=HASE(b4, a_inv, b_cov, C, N_con, DF)
			print('Read {}, processed {}, total {}'.format(SNP[0],SNP[1],SNP[2] ))
			Analyser.t_stat=t_stat
			Analyser.SE=SE
			if mapper is not None and mapper.cluster == 'y':
				Analyser.cluster=True
				Analyser.chunk=ch
				Analyser.node=mapper.node[1]
			if phen.permutation:
				Analyser.permutation=True
			Analyser.save_result( phen.folder._data.names[phen.folder._data.start:phen.folder._data.finish])
			t_stat=None
			Analyser.t_stat=None
			del b4
			del C
			del b_cov
			del a_inv
			del a_test
			del t_stat
			gc.collect()

	if Analyser.cluster:
		np.save(os.path.join(Analyser.out,str(Analyser.node)+'_node_RSID.npy'),Analyser.rsid_dic)
Beispiel #5
0
				SNPs_index, keys=mapper.get()
			else:
				ch=mapper.chunk_pop()
				if ch is None:
					SNPs_index=None
					break
				print ch
				SNPs_index, keys=mapper.get(chunk_number=ch)

			if isinstance(SNPs_index, type(None)):
				break

			Analyser.rsid=keys
			if np.sum(PD)==0:
				genotype=np.array([])
				genotype=merge_genotype(gen, SNPs_index, mapper, flip_flag=False)
				genotype=genotype[:,row_index[0]]

			#TODO (low) add interaction

			a_test=np.array([])
			b_cov=np.array([])
			C=np.array([])
			a_cov=np.array([])
			b4=np.array([])

			if args.protocol is not None:
				if protocol.enable:
					regression_model=protocol.regression_model()
			else:
				regression_model=None