예제 #1
0
def map_sequence_key(sequence_key):

	s1 = aliased(Sequence)
	s2 = aliased(Sequence)
	session = Session()
	sequences = session.query(FilesystemOutfile,s1,s2).join((s1,FilesystemOutfile.sequence), (s2,FilesystemOutfile.parent_sequence)).filter(FilesystemOutfile.sequence_key==sequence_key).all()

	for seq in sequences:

		print seq
		#print seq[0].parent_sequence_key
		fold_start = seq[2].sequence.find(seq[1].sequence) + 1
		fold_stop = fold_start + len(seq[1].sequence)
		print fold_start, fold_stop

		domain_regions = session.query(Domain, DomainRegion).join(DomainRegion.domain).filter(Domain.parent_sequence_key==seq[0].parent_sequence_key).all()
		for dreg in domain_regions:
			print dreg
			domain_start = dreg[1].start
			domain_stop = dreg[1].stop

			loc_dict = {"fold_start":fold_start, "fold_stop":fold_stop, "domain_start":domain_start, "domain_stop":domain_stop}
			coverage = percent_coverage(loc_dict)
			print coverage
			#kdrew: do not add entries which are nonoverlapping
			if not coverage['location'] == "fold_domain_nonoverlapping" and not coverage['location'] == "domain_fold_nonoverlapping":
				dfm = DomainFoldableMap(parent_sequence_key = seq[0].parent_sequence_key, fold_sequence_key = seq[0].sequence_key, domain_sequence_key = dreg[0].domain_sequence_key, domain_key = dreg[0].id, outfile_key = seq[0].id, fold_start=fold_start, fold_stop=fold_stop, domain_start=domain_start, domain_stop=domain_stop, fold_coverage=coverage['fold_coverage'], domain_coverage=coverage['domain_coverage'])
				print "dfm: ", dfm

				session.add_all([dfm,])
예제 #2
0
def domain_fasta(id=None):

	if id: 
		outfile = "/Users/kdrew/tmp/exp_%d.fasta" % (id,)
		experiment = Session.query(Experiment).filter(Experiment.id==id).one()
		species = experiment.species()
		print species
		sequences = Session().query(Sequence).join(DomainSCCS.domain).join(Domain.sequence).join(Domain.proteins).join(Protein.experiment).filter(not_(DomainSCCS.domain_type.in_( ('psiblast','fold_recognition')))).filter(Experiment.id==id).distinct().all()
	else:
		print "all denovo"
		outfile = "/Users/kdrew/tmp/hpf_denovo.fasta"
		sequences = Session().query(Sequence).join(DomainSCCS.domain).join(Domain.sequence).join(Domain.proteins).join(Protein.experiment).filter(not_(DomainSCCS.domain_type.in_( ('psiblast','fold_recognition')))).distinct().all()


	print len(sequences)

	#sequences = Session().query(Sequence).join(Domain.sequence).join(Domain.proteins).join(Protein.experiment).filter(Experiment.id==id).distinct().all()
	#records = imap(lambda x: x.biopython(description=species).format("fasta"), sequences)


	bio_records = YRCRecordFactory().create(*sequences)
	with open(outfile, "w") as handle1:
		SeqIO.write(bio_records, handle1,"fasta")

	handle1.close()