Example #1
0
def main():
    parser = get_parser()
    args = validate_args(parser)

    seq_records = seqrecords.read_fasta(args.fasta)
    if args.word_size:
        p = word_pattern.create(seq_records.seq_list, args.word_size)
    else:
        p = word_pattern.read(args.word_pattern)

    if args.reduce_alphabet:
        p = p.reduce_alphabet(seqcontent.get_reduced_alphabet(args.molecule))
    if args.merge_revcomp:
        p = p.merge_revcomp()

    freqs = word_vector.Freqs(seq_records.length_list, p)

    dist = word_distance.Distance(freqs, args.distance)
    matrix = distmatrix.create(seq_records.id_list, dist)

    if args.out:
        oh = open(args.out, 'w')
        matrix.write_to_file(oh, args.outfmt)
        oh.close()
    else:
        matrix.display(args.outfmt)
Example #2
0
def validate_args(parser):
    args = parser.parse_args()
    if args.word_size:
        if args.word_size < 3:
            parser.error('Word size must be >= 3')

    elif args.word_patterns:
        l = []
        for i in range(0, 3):
            try:
                p = word_pattern.read(args.word_patterns[i])
                l.append(p)
            except Exception:
                parser.error('Invalid format for word pattern: {0}'.format(
                    args.word_patterns[i].name))

        if len(l) == 3:
            # check if follow rule
            k, k1, k2 = [len(p.pat_list[0]) for p in l]
            if not (k == k1 + 1 == k2 + 2):
                parser.error(
                    '''Word pattern lengths do not follow k, k-1, k-2''')

        args.word_patterns = l
    else:
        parser.error("Specify either: --word_size or --word_pattern.")
    return args
Example #3
0
    def test_input_output_file_pattern(self):

        for wordpos in [True, False]:
            p1 = word_pattern.create(self.dna_records.seq_list,
                                     word_size=1,
                                     wordpos=wordpos)
            oh = open(utils.get_test_data('pattern.txt'), 'w')
            oh.write(p1.format())
            oh.close()
            fh = open(utils.get_test_data('pattern.txt'))
            p2 = word_pattern.read(fh)
            fh.close()
            self.assertEqual(p1.format(), p2.format())
        os.remove(utils.get_test_data('pattern.txt'))
Example #4
0
def validate_args(parser):
    args = parser.parse_args()
    if args.word_size:
        if args.word_size < 1:
            parser.error('word size must be >= 1')
    elif args.word_pattern:
        p = word_pattern.read(args.word_pattern)
        if not p.pos_list:
            e = "{0} does not contain info on word positions.\n"
            e += "Please use: create_wordpattern.py with"
            e += " --word_position option."
            parser.error(e.format(args.word_pattern.name))
        else:
            args.word_pattern = p
    else:
        parser.error("Specify either: --word_size or --word_pattern.")
    return args
Example #5
0
def main():
    parser = get_parser()
    args = validate_args(parser)

    seq_records = seqrecords.read_fasta(args.fasta)
    if args.word_size:
        p = word_pattern.create(seq_records.seq_list, args.word_size)
    else:
        p = word_pattern.read(args.word_pattern)

    bools = word_vector.Bools(seq_records.length_list, p)
    dist = word_bool_distance.Distance(bools, args.distance)
    matrix = distmatrix.create(seq_records.id_list, dist)

    if args.out:
        oh = open(args.out, 'w')
        matrix.write_to_file(oh, args.outfmt)
        oh.close()
    else:
        matrix.display(args.outfmt)
Example #6
0
def main():
    parser = get_parser()
    args = validate_args(parser)

    seq_records = seqrecords.read_fasta(args.fasta)

    if args.word_size:
        p = word_pattern.create(seq_records.seq_list, args.word_size)
    else:
        p = word_pattern.read(args.word_pattern)

    veccls = {'counts': word_vector.Counts, 'freqs': word_vector.Freqs}
    vecclsw = {
        'counts': word_vector.CountsWeight,
        'freqs': word_vector.FreqsWeight
    }

    if args.vector == 'counts' or args.vector == 'freqs':
        if args.char_weights is None:
            vec = veccls[args.vector](seq_records.length_list, p)
        else:
            weightmodel = word_vector.WeightModel(
                char_weights=args.char_weights)
            vec = vecclsw[args.vector](seq_records.length_list, p, weightmodel)
    else:
        if args.alphabet_size:
            freqmodel = word_vector.EqualFreqs(
                alphabet_size=args.alphabet_size)
        else:
            freqmodel = word_vector.EquilibriumFreqs(args.char_freqs)
        vec = word_vector.FreqsStd(seq_records.length_list, p, freqmodel)

    dist = word_distance.Distance(vec, args.distance)
    matrix = distmatrix.create(seq_records.id_list, dist)

    if args.out:
        oh = open(args.out, 'w')
        matrix.write_to_file(oh, args.outfmt)
        oh.close()
    else:
        matrix.display(args.outfmt)