def test_run(self): """secondOrFirstField""" x = 'FIRST SECOND' self.assertTrue(biofile.firstField(x)=='FIRST') self.assertTrue(biofile.secondField(x)=='SECOND') y = 'FIRST' self.assertTrue(biofile.secondOrFirstField(y)=='FIRST')
with file(fname, 'r') as inf: composition.read(inf) # Read input if not os.path.isfile(options.in_fname): raise IOError("# Error: file {} does not exist".format( options.in_fname)) (headers, seqs) = biofile.readFASTA(file(options.in_fname, 'r')) if options.translate_sequences: seqs = [translate.translate(s) for s in seqs] zhs = [(h, s) for (h, s) in zip(headers, seqs) if not s is None] all_keys = [biofile.firstField(h) for (h, s) in zhs] (headers, seqs) = zip(*zhs) prot_dict = dict([(biofile.firstField(h), s) for (h, s) in zhs]) gene_orf_dict = dict([ (biofile.secondOrFirstField(h), biofile.firstField(h)) for h in headers ]) orf_gene_dict = dict([(v, k) for (k, v) in gene_orf_dict.items()]) # Select which genes to process query_keys = [] if not options.query_orf is []: # Specific ORF(s) query_keys += options.query_orf if not options.query_gene is []: # Specific gene(s) query_keys += [gene_orf_dict[k] for k in options.query_gene] if len(query_keys) == 0: # Go through all proteins in database query_keys = all_keys
params_outs.write("# Parameters:\n") optdict = vars(options) for (k,v) in optdict.items(): params_outs.write("#\t{k}: {v}\n".format(k=k, v=v)) # Read input if not os.path.isfile(options.in_fname): raise IOError("# Error: file {} does not exist".format(options.in_fname)) (headers, seqs) = biofile.readFASTA(file(options.in_fname, 'r')) #, key_fxn=biofile.secondField) if options.translate_sequences: seqs = [translate.translate(s) for s in seqs] zhs = [(h,s) for (h,s) in zip(headers,seqs) if not s is None] all_keys = [biofile.firstField(h) for (h,s) in zhs] (headers, seqs) = zip(*zhs) prot_dict = dict([(biofile.firstField(h), s) for (h,s) in zhs]) gene_orf_dict = dict([(biofile.secondOrFirstField(h), biofile.firstField(h)) for h in headers]) orf_gene_dict = dict([(v,k) for (k,v) in gene_orf_dict.items()]) # Select which genes to process query_keys = [] if not options.query_orf is []: # Specific ORF(s) query_keys += options.query_orf if not options.query_gene is []: # Specific gene(s) query_keys += [gene_orf_dict[k] for k in options.query_gene] if len(query_keys) == 0: # Go through all proteins in database query_keys = all_keys start_position, end_position = scriptutil.findSequencePositions(options.start_position, options.end_position,
data_outs.write("# Parameters:\n") optdict = vars(options) for (k,v) in optdict.items(): data_outs.write("#\t{k}: {v}\n".format(k=k, v=v)) # Read input if not os.path.isfile(options.in_fname): raise IOError("# Error: file {} does not exist".format(options.in_fname)) (headers, seqs) = biofile.readFASTA(file(options.in_fname, 'r')) #, key_fxn=biofile.secondField) if options.translate_sequences: seqs = [translate.translate(s) for s in seqs] zhs = [(h,s) for (h,s) in zip(headers,seqs) if not s is None] all_keys = [biofile.firstField(h) for (h,s) in zhs] (headers, seqs) = zip(*zhs) prot_dict = dict([(biofile.firstField(h), s) for (h,s) in zhs]) gene_orf_dict = dict([(biofile.secondOrFirstField(h), biofile.firstField(h)) for h in headers]) orf_gene_dict = dict([(v,k) for (k,v) in gene_orf_dict.items()]) # Select which genes to process query_keys = [] if not options.query_orf is []: # Specific ORF(s) query_keys += options.query_orf if not options.query_gene is []: # Specific gene(s) query_keys += [gene_orf_dict[k] for k in options.query_gene] if len(query_keys) == 0: # Go through all proteins in database query_keys = all_keys start_position = 0 end_position = len(seqs[0])