Exemple #1
0
	def test_run(self):
		"""secondOrFirstField"""
		x = 'FIRST SECOND'
		self.assertTrue(biofile.firstField(x)=='FIRST')
		self.assertTrue(biofile.secondField(x)=='SECOND')
		y = 'FIRST'
		self.assertTrue(biofile.secondOrFirstField(y)=='FIRST')
Exemple #2
0
        with file(fname, 'r') as inf:
            composition.read(inf)

    # Read input
    if not os.path.isfile(options.in_fname):
        raise IOError("# Error: file {} does not exist".format(
            options.in_fname))
    (headers, seqs) = biofile.readFASTA(file(options.in_fname, 'r'))
    if options.translate_sequences:
        seqs = [translate.translate(s) for s in seqs]
    zhs = [(h, s) for (h, s) in zip(headers, seqs) if not s is None]
    all_keys = [biofile.firstField(h) for (h, s) in zhs]
    (headers, seqs) = zip(*zhs)
    prot_dict = dict([(biofile.firstField(h), s) for (h, s) in zhs])
    gene_orf_dict = dict([
        (biofile.secondOrFirstField(h), biofile.firstField(h)) for h in headers
    ])
    orf_gene_dict = dict([(v, k) for (k, v) in gene_orf_dict.items()])

    # Select which genes to process
    query_keys = []
    if not options.query_orf is []:
        # Specific ORF(s)
        query_keys += options.query_orf
    if not options.query_gene is []:
        # Specific gene(s)
        query_keys += [gene_orf_dict[k] for k in options.query_gene]
    if len(query_keys) == 0:
        # Go through all proteins in database
        query_keys = all_keys
Exemple #3
0
	params_outs.write("# Parameters:\n")
	optdict = vars(options)
	for (k,v) in optdict.items():
		params_outs.write("#\t{k}: {v}\n".format(k=k, v=v))

	# Read input
	if not os.path.isfile(options.in_fname):
		raise IOError("# Error: file {} does not exist".format(options.in_fname))
	(headers, seqs) = biofile.readFASTA(file(options.in_fname, 'r')) #, key_fxn=biofile.secondField)
	if options.translate_sequences:
		seqs = [translate.translate(s) for s in seqs]
	zhs = [(h,s) for (h,s) in zip(headers,seqs) if not s is None]
	all_keys = [biofile.firstField(h) for (h,s) in zhs]
	(headers, seqs) = zip(*zhs)
	prot_dict = dict([(biofile.firstField(h), s) for (h,s) in zhs])
	gene_orf_dict = dict([(biofile.secondOrFirstField(h), biofile.firstField(h)) for h in headers])
	orf_gene_dict = dict([(v,k) for (k,v) in gene_orf_dict.items()])

	# Select which genes to process
	query_keys = []
	if not options.query_orf is []:
		# Specific ORF(s)
		query_keys += options.query_orf
	if not options.query_gene is []:
		# Specific gene(s)
		query_keys += [gene_orf_dict[k] for k in options.query_gene]
	if len(query_keys) == 0:
		# Go through all proteins in database
		query_keys = all_keys

	start_position, end_position = scriptutil.findSequencePositions(options.start_position, options.end_position, 
Exemple #4
0
	data_outs.write("# Parameters:\n")
	optdict = vars(options)
	for (k,v) in optdict.items():
		data_outs.write("#\t{k}: {v}\n".format(k=k, v=v))

	# Read input
	if not os.path.isfile(options.in_fname):
		raise IOError("# Error: file {} does not exist".format(options.in_fname))
	(headers, seqs) = biofile.readFASTA(file(options.in_fname, 'r')) #, key_fxn=biofile.secondField)
	if options.translate_sequences:
		seqs = [translate.translate(s) for s in seqs]
	zhs = [(h,s) for (h,s) in zip(headers,seqs) if not s is None]
	all_keys = [biofile.firstField(h) for (h,s) in zhs]
	(headers, seqs) = zip(*zhs)
	prot_dict = dict([(biofile.firstField(h), s) for (h,s) in zhs])
	gene_orf_dict = dict([(biofile.secondOrFirstField(h), biofile.firstField(h)) for h in headers])
	orf_gene_dict = dict([(v,k) for (k,v) in gene_orf_dict.items()])

	# Select which genes to process
	query_keys = []
	if not options.query_orf is []:
		# Specific ORF(s)
		query_keys += options.query_orf
	if not options.query_gene is []:
		# Specific gene(s)
		query_keys += [gene_orf_dict[k] for k in options.query_gene]
	if len(query_keys) == 0:
		# Go through all proteins in database
		query_keys = all_keys
	start_position = 0
	end_position = len(seqs[0])