def _build_logodata(options): motif_flag = False fin = options.fin if options.upload is None: if fin is None: fin = StringIO(sys.stdin.read()) else: if fin is None: from . import _from_URL_fileopen fin = _from_URL_fileopen(options.upload) else: raise ValueError( "error: options --fin and --upload are incompatible") try: # Try reading data in transfac format first. from corebio.matrix import Motif motif = Motif.read_transfac(fin, alphabet=options.alphabet) motif_flag = True except ValueError as motif_err: # Failed reading Motif, try reading as multiple sequence data. if options.input_parser == "transfac": raise motif_err # Adding transfac as str insted of parser is a bit of a ugly kludge seqs = read_seq_data(fin, options.input_parser.read, alphabet=options.alphabet, ignore_lower_case=options.ignore_lower_case) if motif_flag: if options.ignore_lower_case: raise ValueError( "error: option --ignore-lower-case incompatible with matrix input" ) if options.reverse or options.revcomp: motif.reverse() if options.complement or options.revcomp: motif.complement() prior = parse_prior(options.composition, motif.alphabet, options.weight) data = LogoData.from_counts(motif.alphabet, motif, prior) else: if options.reverse or options.revcomp: seqs = SeqList([s.reverse() for s in seqs], seqs.alphabet) if options.complement or options.revcomp: if not nucleic_alphabet.alphabetic(seqs.alphabet): raise ValueError('non-nucleic sequence cannot be complemented') aaa = seqs.alphabet seqs.alphabet = nucleic_alphabet seqs = SeqList([Seq(s, seqs.alphabet).complement() for s in seqs], seqs.alphabet) seqs.alphabet = aaa prior = parse_prior(options.composition, seqs.alphabet, options.weight) data = LogoData.from_seqs(seqs, prior) return data
def _build_logodata(options): motif_flag = False fin = options.fin if options.upload is None: if fin is None: fin = StringIO(sys.stdin.read()) else: if fin is None: from . import _from_URL_fileopen fin = _from_URL_fileopen(options.upload) else: raise ValueError("error: options --fin and --upload are incompatible") try: # Try reading data in transfac format first. from corebio.matrix import Motif motif = Motif.read_transfac(fin, alphabet=options.alphabet) motif_flag = True except ValueError as motif_err: # Failed reading Motif, try reading as multiple sequence data. if options.input_parser == "transfac": raise motif_err # Adding transfac as str insted of parser is a bit of a ugly kludge seqs = read_seq_data(fin, options.input_parser.read, alphabet=options.alphabet, ignore_lower_case=options.ignore_lower_case) if motif_flag: if options.ignore_lower_case: raise ValueError("error: option --ignore-lower-case incompatible with matrix input") if options.reverse or options.revcomp: motif.reverse() if options.complement or options.revcomp: motif.complement() prior = parse_prior(options.composition, motif.alphabet, options.weight) data = LogoData.from_counts(motif.alphabet, motif, prior) else: if options.reverse or options.revcomp: seqs = SeqList([s.reverse() for s in seqs], seqs.alphabet) if options.complement or options.revcomp: if not nucleic_alphabet.alphabetic(seqs.alphabet): raise ValueError('non-nucleic sequence cannot be complemented') aaa = seqs.alphabet seqs.alphabet = nucleic_alphabet seqs = SeqList([Seq(s, seqs.alphabet).complement() for s in seqs], seqs.alphabet) seqs.alphabet = aaa prior = parse_prior(options.composition, seqs.alphabet, options.weight) data = LogoData.from_seqs(seqs, prior) return data