Esempio n. 1
0
	def test_run(self):
		"""secondOrFirstField"""
		x = 'FIRST SECOND'
		self.assertTrue(biofile.firstField(x)=='FIRST')
		self.assertTrue(biofile.secondField(x)=='SECOND')
		y = 'FIRST'
		self.assertTrue(biofile.secondOrFirstField(y)=='FIRST')
Esempio n. 2
0
	def secondField(h):
		f = None
		try:
			f = biofile.secondField(h)
		except:
			f = biofile.firstField(h)
		return f
Esempio n. 3
0
	def secondField(h):
		f = None
		try:
			f = biofile.secondField(h)
		except:
			f = biofile.firstField(h)
		return f
Esempio n. 4
0
	# Set up some output
	info_outs = util.OutStreams(sys.stdout)
	outs = util.OutStreams()
	params_outs = util.OutStreams([outs])
	if not options.out_fname is None:
		outf = open(os.path.expanduser(options.out_fname),'w')
		outs.addStream(outf)
	else:
		outs.addStream(sys.stdout)

	orf_dict = None
	gene_orf_map = None
	if not options.fasta_fname is None:
		fname = os.path.expanduser(options.fasta_fname)
		(headers, sequences) = biofile.readFASTA(fname)
		orf_dict = dict(zip([biofile.firstField(h) for h in headers], sequences))
		gene_orf_map = dict([(biofile.secondField(h), biofile.firstField(h)) for h in headers])

	
	# Set the weight matrix
	try:
		matrix = motif.weight_matrices[options.pssm_name]
	except KeyError as ke:
		outs.write("# Unable to find weight matrix {}; try one of {}\n".format(options.pssm_name, ','.join(motif.weight_matrices.keys())))
	window_size = len(matrix['A']) #len(matrix.values()[0])
	# for associating windows with residues, center them
	mid_window = int(math.floor(window_size/2.0))
	
	# Write out parameters
	params_outs.write("# Run started {}\n".format(util.timestamp()))
	params_outs.write("# Command: {}\n".format(' '.join(sys.argv)))
Esempio n. 5
0
			f = biofile.secondField(h)
		except:
			f = biofile.firstField(h)
		return f

	# Read input
	if not os.path.isfile(options.in_fname):
		raise IOError("# Error: file {} does not exist".format(options.in_fname))
	(headers, seqs) = biofile.readFASTA(file(options.in_fname, 'r')) #, key_fxn=biofile.secondField)
	if options.translate_sequences:
		seqs = [translate.translate(s) for s in seqs]
	if not options.dont_align_sequences:
		aligned_seqs = muscle.alignSequences(seqs)
		seqs = aligned_seqs
	zhs = [(h,s) for (h,s) in zip(headers,seqs) if not s is None]
	all_keys = [biofile.firstField(h) for (h,s) in zhs]
	(headers, seqs) = zip(*zhs)
	prot_dict = dict([(biofile.firstField(h), s) for (h,s) in zhs])
	gene_orf_dict = dict([(secondField(h), biofile.firstField(h)) for h in headers])
	orf_gene_dict = dict([(v,k) for (k,v) in gene_orf_dict.items()])
	
	# Write output
	n_written = 0
	data_outs.write("header\n")
	for orf in query_keys:
		n_written += 1

	# Write out stopping time
	data_outs.write("# Run finished {}\n".format(util.timestamp()))

	# Shut down output
Esempio n. 6
0
             continue
     if options.translate:
         seq = translate.translateRaw(seq)
     if options.degap:
         seq = seq.replace(gap, '')
     if not options.exclude:
         if not options.end_aa is None and options.end_aa <= len(seq):
             seq = seq[0:(options.end_aa)]
         seq = seq[(options.begin_aa - 1):]
     else:  # Exclude the sequence
         assert options.end_aa < len(seq)
         assert options.begin_aa < options.end_aa
         seq = seq[0:(options.begin_aa - 1)] + seq[(options.end_aa):]
     degapped_seq = seq.replace(gap, "")
     line = "#{}\n{}\t{:d}\t{:1.4f}\t{:1.4f}\t{:1.4f}".format(
         h, biofile.firstField(h), pp.getLength(degapped_seq),
         pp.getCharge(degapped_seq, options.pH),
         pp.getIsoelectricPoint(degapped_seq),
         pp.getHydrophobicity(degapped_seq))
     if not aas is None:
         counts = Composition()
         counts.initFromSequence(degapped_seq)
         freqs = Composition()
         freqs.initFromSequence(degapped_seq)
         freqs.normalize()
         line += '\t' + '\t'.join([
             "{:1.4f}".format(freqs[aa]) for aa in aas
         ]) + '\t' + '\t'.join(["{:d}".format(counts[aa]) for aa in aas])
     data_outs.write(line + '\n')
     #print("# Wrote line\n")
 if not options.out_fname is None:
Esempio n. 7
0
    else:
        fname = os.path.expanduser(options.composition_fname)
        if not os.path.isfile(fname):
            raise IOError("# Error: file {} does not exist".format(fname))
        with file(fname, 'r') as inf:
            composition.read(inf)

    # Read input
    if not os.path.isfile(options.in_fname):
        raise IOError("# Error: file {} does not exist".format(
            options.in_fname))
    (headers, seqs) = biofile.readFASTA(file(options.in_fname, 'r'))
    if options.translate_sequences:
        seqs = [translate.translate(s) for s in seqs]
    zhs = [(h, s) for (h, s) in zip(headers, seqs) if not s is None]
    all_keys = [biofile.firstField(h) for (h, s) in zhs]
    (headers, seqs) = zip(*zhs)
    prot_dict = dict([(biofile.firstField(h), s) for (h, s) in zhs])
    gene_orf_dict = dict([
        (biofile.secondOrFirstField(h), biofile.firstField(h)) for h in headers
    ])
    orf_gene_dict = dict([(v, k) for (k, v) in gene_orf_dict.items()])

    # Select which genes to process
    query_keys = []
    if not options.query_orf is []:
        # Specific ORF(s)
        query_keys += options.query_orf
    if not options.query_gene is []:
        # Specific gene(s)
        query_keys += [gene_orf_dict[k] for k in options.query_gene]
Esempio n. 8
0
	# Write out parameters
	params_outs.write("# Run started {}\n".format(util.timestamp()))
	params_outs.write("# Command: {}\n".format(' '.join(sys.argv)))
	params_outs.write("# Parameters:\n")
	optdict = vars(options)
	for (k,v) in optdict.items():
		params_outs.write("#\t{k}: {v}\n".format(k=k, v=v))

	# Read input
	if not os.path.isfile(options.in_fname):
		raise IOError("# Error: file {} does not exist".format(options.in_fname))
	(headers, seqs) = biofile.readFASTA(file(options.in_fname, 'r')) #, key_fxn=biofile.secondField)
	if options.translate_sequences:
		seqs = [translate.translate(s) for s in seqs]
	zhs = [(h,s) for (h,s) in zip(headers,seqs) if not s is None]
	all_keys = [biofile.firstField(h) for (h,s) in zhs]
	(headers, seqs) = zip(*zhs)
	prot_dict = dict([(biofile.firstField(h), s) for (h,s) in zhs])
	gene_orf_dict = dict([(biofile.secondOrFirstField(h), biofile.firstField(h)) for h in headers])
	orf_gene_dict = dict([(v,k) for (k,v) in gene_orf_dict.items()])

	# Select which genes to process
	query_keys = []
	if not options.query_orf is []:
		# Specific ORF(s)
		query_keys += options.query_orf
	if not options.query_gene is []:
		# Specific gene(s)
		query_keys += [gene_orf_dict[k] for k in options.query_gene]
	if len(query_keys) == 0:
		# Go through all proteins in database
Esempio n. 9
0
	def parseHeader(x):
		name = biofile.firstField(x)
		property_entries = [tuple(y.split('=')) for y in x.split() if '=' in y]
		props = dict(property_entries)
		return name, props
Esempio n. 10
0
File: protprop.py Progetto: dad/base
		if options.query:
			if not options.query in h:
				continue
		if options.translate:
			seq = translate.translateRaw(seq)
		if options.degap:
			seq = seq.replace(gap,'')
		if not options.exclude:
			if not options.end_aa is None and options.end_aa <= len(seq):
				seq = seq[0:(options.end_aa)]
			seq = seq[(options.begin_aa-1):]
		else: # Exclude the sequence
			assert options.end_aa < len(seq)
			assert options.begin_aa < options.end_aa
			seq = seq[0:(options.begin_aa-1)] + seq[(options.end_aa):]
		degapped_seq = seq.replace(gap,"")
		line = "#{}\n{}\t{:d}\t{:1.4f}\t{:1.4f}\t{:1.4f}".format(h, biofile.firstField(h), pp.getLength(degapped_seq), pp.getCharge(degapped_seq, options.pH), pp.getIsoelectricPoint(degapped_seq), pp.getHydrophobicity(degapped_seq))
		if not aas is None:
			counts = Composition()
			counts.initFromSequence(degapped_seq)
			freqs = Composition()
			freqs.initFromSequence(degapped_seq)
			freqs.normalize()
			line += '\t' + '\t'.join(["{:1.4f}".format(freqs[aa]) for aa in aas]) + '\t' + '\t'.join(["{:d}".format(counts[aa]) for aa in aas])
		data_outs.write(line + '\n')
		#print("# Wrote line\n")
	if not options.out_fname is None:
		outf.close()