def _build_logodata(options): motif_flag = False fin = options.fin if options.upload is None: if fin is None: fin = StringIO(sys.stdin.read()) else: if fin is None: from . import _from_URL_fileopen fin = _from_URL_fileopen(options.upload) else: raise ValueError("error: options --fin and --upload are incompatible") try: # Try reading data in transfac format first. from corebio.matrix import Motif motif = Motif.read_transfac(fin, alphabet=options.alphabet) motif_flag = True except ValueError as motif_err: # Failed reading Motif, try reading as multiple sequence data. if options.input_parser == "transfac": raise motif_err # Adding transfac as str insted of parser is a bit of a ugly kludge seqs = read_seq_data(fin, options.input_parser.read, alphabet=options.alphabet, ignore_lower_case=options.ignore_lower_case) if motif_flag: if options.ignore_lower_case: raise ValueError("error: option --ignore-lower-case incompatible with matrix input") if options.reverse or options.revcomp: motif.reverse() if options.complement or options.revcomp: motif.complement() prior = parse_prior(options.composition, motif.alphabet, options.weight) data = LogoData.from_counts(motif.alphabet, motif, prior) else: if options.reverse or options.revcomp: seqs = SeqList([s.reverse() for s in seqs], seqs.alphabet) if options.complement or options.revcomp: if not nucleic_alphabet.alphabetic(seqs.alphabet): raise ValueError('non-nucleic sequence cannot be complemented') aaa = seqs.alphabet seqs.alphabet = nucleic_alphabet seqs = SeqList([Seq(s, seqs.alphabet).complement() for s in seqs], seqs.alphabet) seqs.alphabet = aaa prior = parse_prior(options.composition, seqs.alphabet, options.weight) data = LogoData.from_seqs(seqs, prior) return data
def create_logo(self, seqs=[]): """Create sequence logo for input sequences.""" # seperate headers headers, instances = [list(x) for x in zip(*seqs)] if self.options.sequence_type is 'rna': alphabet = Alphabet('ACGU') elif self.options.sequence_type is 'protein': alphabet = Alphabet('ACDEFGHIKLMNPQRSTVWY') else: alphabet = Alphabet('AGCT') motif_corebio = SeqList(alist=instances, alphabet=alphabet) data = wbl.LogoData().from_seqs(motif_corebio) format = wbl.LogoFormat(data, self.options) if self.output_format == 'png': return wbl.png_formatter(data, format) elif self.output_format == 'png_print': return wbl.png_print_formatter(data, format) elif self.output_format == 'jpeg': return wbl.jpeg_formatter(data, format) else: return wbl.eps_formatter(data, format)
def _build_logodata(options): motif_flag = False fin = options.fin if options.upload is None: if fin is None: fin = StringIO(sys.stdin.read()) else: if fin is None: from . import _from_URL_fileopen fin = _from_URL_fileopen(options.upload) else: raise ValueError( "error: options --fin and --upload are incompatible") try: # Try reading data in transfac format first. from corebio.matrix import Motif motif = Motif.read_transfac(fin, alphabet=options.alphabet) motif_flag = True except ValueError as motif_err: # Failed reading Motif, try reading as multiple sequence data. if options.input_parser == "transfac": raise motif_err # Adding transfac as str insted of parser is a bit of a ugly kludge seqs = read_seq_data(fin, options.input_parser.read, alphabet=options.alphabet, ignore_lower_case=options.ignore_lower_case) if motif_flag: if options.ignore_lower_case: raise ValueError( "error: option --ignore-lower-case incompatible with matrix input" ) if options.reverse or options.revcomp: motif.reverse() if options.complement or options.revcomp: motif.complement() prior = parse_prior(options.composition, motif.alphabet, options.weight) data = LogoData.from_counts(motif.alphabet, motif, prior) else: if options.reverse or options.revcomp: seqs = SeqList([s.reverse() for s in seqs], seqs.alphabet) if options.complement or options.revcomp: if not nucleic_alphabet.alphabetic(seqs.alphabet): raise ValueError('non-nucleic sequence cannot be complemented') aaa = seqs.alphabet seqs.alphabet = nucleic_alphabet seqs = SeqList([Seq(s, seqs.alphabet).complement() for s in seqs], seqs.alphabet) seqs.alphabet = aaa prior = parse_prior(options.composition, seqs.alphabet, options.weight) data = LogoData.from_seqs(seqs, prior) return data
def read(fin, alphabet=None): """ Extract sequence data from a nexus file.""" n = Nexus(fin) seqs = [] for taxon in n.taxlabels: name = safename(taxon) r = n.matrix[taxon] if alphabet is None: s = Seq(r, name=name, alphabet=r.alphabet) else: s = Seq(r, name=name, alphabet=alphabet) seqs.append(s) if len(seqs) == 0: # Something went terrible wrong. raise ValueError("Cannot parse file") return SeqList(seqs)
seqs = read_seq_data(fin, options.input_parser.read, alphabet=options.alphabet, ignore_lower_case = options.ignore_lower_case) if motif_flag : if options.ignore_lower_case: raise ValueError("error: option --ignore-lower-case incompatible with matrix input") if options.reverse: motif.reverse() if options.complement: motif.complement() prior = parse_prior( options.composition,motif.alphabet, options.weight) data = LogoData.from_counts(motif.alphabet, motif, prior) else : if options.reverse: seqs = SeqList([s.reverse() for s in seqs], seqs.alphabet) if options.complement : seqs= SeqList( [Seq(s,seqs.alphabet).complement() for s in seqs], seqs.alphabet) prior = parse_prior( options.composition,seqs.alphabet, options.weight) data = LogoData.from_seqs(seqs, prior) return data def _build_logoformat( logodata, opts) :
motif.reverse() if options.complement: motif.complement() if not isCodon: prior, compos = parse_prior(fin_compos, motif.alphabet, fin_weight) data = LogoData.from_counts(motif.alphabet, motif, options.stats_func, prior, compos, second_data) else: raise ValueError("option --sequence-type 'codon' incompatible with matrix input") else: if options.codon_frame < 0 and isCodon: options.reverse = True options.complement = True if options.reverse: seqs = SeqList([s.reverse() for s in seqs], seqs.alphabet) if options.complement: seqs = SeqList([Seq(s, seqs.alphabet).complement() for s in seqs], seqs.alphabet) if isCodon: if abs(options.codon_frame) > 1: beg = abs(options.codon_frame) - 1 end = beg + int((len(seqs[0]) - beg) / 3) * 3 seqs = SeqList([s[beg:end] for s in seqs], seqs.alphabet) if std_alphabets["dna"] == seqs.alphabet: seqs.alphabet = codon_dna_alphabet elif std_alphabets["rna"] == seqs.alphabet: seqs.alphabet = codon_rna_alphabet prior, compos = parse_prior(fin_compos, seqs.alphabet, fin_weight)
def read(fin, alphabet=None): assert fin is not None # Do something with arguments to quite pychecker if alphabet is not None : pass return SeqList([])