def test_run(self): """secondOrFirstField""" x = 'FIRST SECOND' self.assertTrue(biofile.firstField(x)=='FIRST') self.assertTrue(biofile.secondField(x)=='SECOND') y = 'FIRST' self.assertTrue(biofile.secondOrFirstField(y)=='FIRST')
def secondField(h): f = None try: f = biofile.secondField(h) except: f = biofile.firstField(h) return f
# Set up some output info_outs = util.OutStreams(sys.stdout) outs = util.OutStreams() params_outs = util.OutStreams([outs]) if not options.out_fname is None: outf = open(os.path.expanduser(options.out_fname),'w') outs.addStream(outf) else: outs.addStream(sys.stdout) orf_dict = None gene_orf_map = None if not options.fasta_fname is None: fname = os.path.expanduser(options.fasta_fname) (headers, sequences) = biofile.readFASTA(fname) orf_dict = dict(zip([biofile.firstField(h) for h in headers], sequences)) gene_orf_map = dict([(biofile.secondField(h), biofile.firstField(h)) for h in headers]) # Set the weight matrix try: matrix = motif.weight_matrices[options.pssm_name] except KeyError as ke: outs.write("# Unable to find weight matrix {}; try one of {}\n".format(options.pssm_name, ','.join(motif.weight_matrices.keys()))) window_size = len(matrix['A']) #len(matrix.values()[0]) # for associating windows with residues, center them mid_window = int(math.floor(window_size/2.0)) # Write out parameters params_outs.write("# Run started {}\n".format(util.timestamp())) params_outs.write("# Command: {}\n".format(' '.join(sys.argv)))
f = biofile.secondField(h) except: f = biofile.firstField(h) return f # Read input if not os.path.isfile(options.in_fname): raise IOError("# Error: file {} does not exist".format(options.in_fname)) (headers, seqs) = biofile.readFASTA(file(options.in_fname, 'r')) #, key_fxn=biofile.secondField) if options.translate_sequences: seqs = [translate.translate(s) for s in seqs] if not options.dont_align_sequences: aligned_seqs = muscle.alignSequences(seqs) seqs = aligned_seqs zhs = [(h,s) for (h,s) in zip(headers,seqs) if not s is None] all_keys = [biofile.firstField(h) for (h,s) in zhs] (headers, seqs) = zip(*zhs) prot_dict = dict([(biofile.firstField(h), s) for (h,s) in zhs]) gene_orf_dict = dict([(secondField(h), biofile.firstField(h)) for h in headers]) orf_gene_dict = dict([(v,k) for (k,v) in gene_orf_dict.items()]) # Write output n_written = 0 data_outs.write("header\n") for orf in query_keys: n_written += 1 # Write out stopping time data_outs.write("# Run finished {}\n".format(util.timestamp())) # Shut down output
continue if options.translate: seq = translate.translateRaw(seq) if options.degap: seq = seq.replace(gap, '') if not options.exclude: if not options.end_aa is None and options.end_aa <= len(seq): seq = seq[0:(options.end_aa)] seq = seq[(options.begin_aa - 1):] else: # Exclude the sequence assert options.end_aa < len(seq) assert options.begin_aa < options.end_aa seq = seq[0:(options.begin_aa - 1)] + seq[(options.end_aa):] degapped_seq = seq.replace(gap, "") line = "#{}\n{}\t{:d}\t{:1.4f}\t{:1.4f}\t{:1.4f}".format( h, biofile.firstField(h), pp.getLength(degapped_seq), pp.getCharge(degapped_seq, options.pH), pp.getIsoelectricPoint(degapped_seq), pp.getHydrophobicity(degapped_seq)) if not aas is None: counts = Composition() counts.initFromSequence(degapped_seq) freqs = Composition() freqs.initFromSequence(degapped_seq) freqs.normalize() line += '\t' + '\t'.join([ "{:1.4f}".format(freqs[aa]) for aa in aas ]) + '\t' + '\t'.join(["{:d}".format(counts[aa]) for aa in aas]) data_outs.write(line + '\n') #print("# Wrote line\n") if not options.out_fname is None:
else: fname = os.path.expanduser(options.composition_fname) if not os.path.isfile(fname): raise IOError("# Error: file {} does not exist".format(fname)) with file(fname, 'r') as inf: composition.read(inf) # Read input if not os.path.isfile(options.in_fname): raise IOError("# Error: file {} does not exist".format( options.in_fname)) (headers, seqs) = biofile.readFASTA(file(options.in_fname, 'r')) if options.translate_sequences: seqs = [translate.translate(s) for s in seqs] zhs = [(h, s) for (h, s) in zip(headers, seqs) if not s is None] all_keys = [biofile.firstField(h) for (h, s) in zhs] (headers, seqs) = zip(*zhs) prot_dict = dict([(biofile.firstField(h), s) for (h, s) in zhs]) gene_orf_dict = dict([ (biofile.secondOrFirstField(h), biofile.firstField(h)) for h in headers ]) orf_gene_dict = dict([(v, k) for (k, v) in gene_orf_dict.items()]) # Select which genes to process query_keys = [] if not options.query_orf is []: # Specific ORF(s) query_keys += options.query_orf if not options.query_gene is []: # Specific gene(s) query_keys += [gene_orf_dict[k] for k in options.query_gene]
# Write out parameters params_outs.write("# Run started {}\n".format(util.timestamp())) params_outs.write("# Command: {}\n".format(' '.join(sys.argv))) params_outs.write("# Parameters:\n") optdict = vars(options) for (k,v) in optdict.items(): params_outs.write("#\t{k}: {v}\n".format(k=k, v=v)) # Read input if not os.path.isfile(options.in_fname): raise IOError("# Error: file {} does not exist".format(options.in_fname)) (headers, seqs) = biofile.readFASTA(file(options.in_fname, 'r')) #, key_fxn=biofile.secondField) if options.translate_sequences: seqs = [translate.translate(s) for s in seqs] zhs = [(h,s) for (h,s) in zip(headers,seqs) if not s is None] all_keys = [biofile.firstField(h) for (h,s) in zhs] (headers, seqs) = zip(*zhs) prot_dict = dict([(biofile.firstField(h), s) for (h,s) in zhs]) gene_orf_dict = dict([(biofile.secondOrFirstField(h), biofile.firstField(h)) for h in headers]) orf_gene_dict = dict([(v,k) for (k,v) in gene_orf_dict.items()]) # Select which genes to process query_keys = [] if not options.query_orf is []: # Specific ORF(s) query_keys += options.query_orf if not options.query_gene is []: # Specific gene(s) query_keys += [gene_orf_dict[k] for k in options.query_gene] if len(query_keys) == 0: # Go through all proteins in database
def parseHeader(x): name = biofile.firstField(x) property_entries = [tuple(y.split('=')) for y in x.split() if '=' in y] props = dict(property_entries) return name, props
if options.query: if not options.query in h: continue if options.translate: seq = translate.translateRaw(seq) if options.degap: seq = seq.replace(gap,'') if not options.exclude: if not options.end_aa is None and options.end_aa <= len(seq): seq = seq[0:(options.end_aa)] seq = seq[(options.begin_aa-1):] else: # Exclude the sequence assert options.end_aa < len(seq) assert options.begin_aa < options.end_aa seq = seq[0:(options.begin_aa-1)] + seq[(options.end_aa):] degapped_seq = seq.replace(gap,"") line = "#{}\n{}\t{:d}\t{:1.4f}\t{:1.4f}\t{:1.4f}".format(h, biofile.firstField(h), pp.getLength(degapped_seq), pp.getCharge(degapped_seq, options.pH), pp.getIsoelectricPoint(degapped_seq), pp.getHydrophobicity(degapped_seq)) if not aas is None: counts = Composition() counts.initFromSequence(degapped_seq) freqs = Composition() freqs.initFromSequence(degapped_seq) freqs.normalize() line += '\t' + '\t'.join(["{:1.4f}".format(freqs[aa]) for aa in aas]) + '\t' + '\t'.join(["{:d}".format(counts[aa]) for aa in aas]) data_outs.write(line + '\n') #print("# Wrote line\n") if not options.out_fname is None: outf.close()