Example #1
0
def _build_logodata(options):
    motif_flag = False

    fin = options.fin

    if options.upload is None:
        if fin is None:
            fin = StringIO(sys.stdin.read())
    else:
        if fin is None:
            from . import _from_URL_fileopen
            fin = _from_URL_fileopen(options.upload)
        else:
            raise ValueError("error: options --fin and --upload are incompatible")

    try:
        # Try reading data in transfac format first.     
        from corebio.matrix import Motif
        motif = Motif.read_transfac(fin, alphabet=options.alphabet)
        motif_flag = True
    except ValueError as motif_err:
        # Failed reading Motif, try reading as multiple sequence data.
        if options.input_parser == "transfac":
            raise motif_err  # Adding transfac as str insted of parser is a bit of a ugly kludge
        seqs = read_seq_data(fin,
                             options.input_parser.read,
                             alphabet=options.alphabet,
                             ignore_lower_case=options.ignore_lower_case)

    if motif_flag:
        if options.ignore_lower_case:
            raise ValueError("error: option --ignore-lower-case incompatible with matrix input")
        if options.reverse or options.revcomp:
            motif.reverse()
        if options.complement or options.revcomp:
            motif.complement()

        prior = parse_prior(options.composition, motif.alphabet, options.weight)
        data = LogoData.from_counts(motif.alphabet, motif, prior)
    else:
        if options.reverse or options.revcomp:
            seqs = SeqList([s.reverse() for s in seqs], seqs.alphabet)

        if options.complement or options.revcomp:
            if not nucleic_alphabet.alphabetic(seqs.alphabet):
                raise ValueError('non-nucleic sequence cannot be complemented')
            aaa = seqs.alphabet
            seqs.alphabet = nucleic_alphabet
            seqs = SeqList([Seq(s, seqs.alphabet).complement() for s in seqs], seqs.alphabet)
            seqs.alphabet = aaa

        prior = parse_prior(options.composition, seqs.alphabet, options.weight)
        data = LogoData.from_seqs(seqs, prior)

    return data
    def create_logo(self, seqs=[]):
        """Create sequence logo for input sequences."""
        # seperate headers
        headers, instances = [list(x)
                              for x in zip(*seqs)]

        if self.options.sequence_type is 'rna':
            alphabet = Alphabet('ACGU')
        elif self.options.sequence_type is 'protein':
            alphabet = Alphabet('ACDEFGHIKLMNPQRSTVWY')
        else:
            alphabet = Alphabet('AGCT')
        motif_corebio = SeqList(alist=instances, alphabet=alphabet)
        data = wbl.LogoData().from_seqs(motif_corebio)

        format = wbl.LogoFormat(data, self.options)

        if self.output_format == 'png':
            return wbl.png_formatter(data, format)
        elif self.output_format == 'png_print':
            return wbl.png_print_formatter(data, format)
        elif self.output_format == 'jpeg':
            return wbl.jpeg_formatter(data, format)
        else:
            return wbl.eps_formatter(data, format)
Example #3
0
def _build_logodata(options):
    motif_flag = False

    fin = options.fin

    if options.upload is None:
        if fin is None:
            fin = StringIO(sys.stdin.read())
    else:
        if fin is None:
            from . import _from_URL_fileopen
            fin = _from_URL_fileopen(options.upload)
        else:
            raise ValueError(
                "error: options --fin and --upload are incompatible")

    try:
        # Try reading data in transfac format first.
        from corebio.matrix import Motif
        motif = Motif.read_transfac(fin, alphabet=options.alphabet)
        motif_flag = True
    except ValueError as motif_err:
        # Failed reading Motif, try reading as multiple sequence data.
        if options.input_parser == "transfac":
            raise motif_err  # Adding transfac as str insted of parser is a bit of a ugly kludge
        seqs = read_seq_data(fin,
                             options.input_parser.read,
                             alphabet=options.alphabet,
                             ignore_lower_case=options.ignore_lower_case)

    if motif_flag:
        if options.ignore_lower_case:
            raise ValueError(
                "error: option --ignore-lower-case incompatible with matrix input"
            )
        if options.reverse or options.revcomp:
            motif.reverse()
        if options.complement or options.revcomp:
            motif.complement()

        prior = parse_prior(options.composition, motif.alphabet,
                            options.weight)
        data = LogoData.from_counts(motif.alphabet, motif, prior)
    else:
        if options.reverse or options.revcomp:
            seqs = SeqList([s.reverse() for s in seqs], seqs.alphabet)

        if options.complement or options.revcomp:
            if not nucleic_alphabet.alphabetic(seqs.alphabet):
                raise ValueError('non-nucleic sequence cannot be complemented')
            aaa = seqs.alphabet
            seqs.alphabet = nucleic_alphabet
            seqs = SeqList([Seq(s, seqs.alphabet).complement() for s in seqs],
                           seqs.alphabet)
            seqs.alphabet = aaa

        prior = parse_prior(options.composition, seqs.alphabet, options.weight)
        data = LogoData.from_seqs(seqs, prior)

    return data
Example #4
0
def read(fin, alphabet=None):
    """ Extract sequence data from a nexus file."""
    n = Nexus(fin)

    seqs = []
    for taxon in n.taxlabels:
        name = safename(taxon)
        r = n.matrix[taxon]
        if alphabet is None:
            s = Seq(r, name=name, alphabet=r.alphabet)
        else:
            s = Seq(r, name=name, alphabet=alphabet)
        seqs.append(s)

    if len(seqs) == 0:
        # Something went terrible wrong.
        raise ValueError("Cannot parse file")

    return SeqList(seqs)
Example #5
0
        seqs = read_seq_data(fin, 
            options.input_parser.read,
            alphabet=options.alphabet,
            ignore_lower_case = options.ignore_lower_case)   

    if motif_flag :
        if options.ignore_lower_case:
            raise ValueError("error: option --ignore-lower-case incompatible with matrix input")
        if options.reverse: motif.reverse()
        if options.complement: motif.complement()

        prior = parse_prior( options.composition,motif.alphabet, options.weight)
        data = LogoData.from_counts(motif.alphabet, motif, prior)
    else :
        if options.reverse: 
            seqs = SeqList([s.reverse() for s in seqs], seqs.alphabet)
        
        if options.complement :
            seqs= SeqList( [Seq(s,seqs.alphabet).complement() for s in seqs], seqs.alphabet)

        prior = parse_prior( options.composition,seqs.alphabet, options.weight)
        data = LogoData.from_seqs(seqs, prior)




    return data
     
 
             
def _build_logoformat( logodata, opts) :
Example #6
0
            motif.reverse()
        if options.complement:
            motif.complement()

        if not isCodon:
            prior, compos = parse_prior(fin_compos, motif.alphabet, fin_weight)
            data = LogoData.from_counts(motif.alphabet, motif, options.stats_func, prior, compos, second_data)
        else:
            raise ValueError("option --sequence-type 'codon' incompatible with matrix input")
    else:
        if options.codon_frame < 0 and isCodon:
            options.reverse = True
            options.complement = True

        if options.reverse:
            seqs = SeqList([s.reverse() for s in seqs], seqs.alphabet)

        if options.complement:
            seqs = SeqList([Seq(s, seqs.alphabet).complement() for s in seqs], seqs.alphabet)

        if isCodon:
            if abs(options.codon_frame) > 1:
                beg = abs(options.codon_frame) - 1
                end = beg + int((len(seqs[0]) - beg) / 3) * 3
                seqs = SeqList([s[beg:end] for s in seqs], seqs.alphabet)
            if std_alphabets["dna"] == seqs.alphabet:
                seqs.alphabet = codon_dna_alphabet
            elif std_alphabets["rna"] == seqs.alphabet:
                seqs.alphabet = codon_rna_alphabet

        prior, compos = parse_prior(fin_compos, seqs.alphabet, fin_weight)
Example #7
0
def read(fin, alphabet=None):       
    assert fin is not None    # Do something with arguments to quite pychecker
    if alphabet is not None : pass
    return SeqList([])