def main(): parser = get_parser() args = validate_args(parser) seq_records = seqrecords.read_fasta(args.fasta) if args.word_size: p = word_pattern.create(seq_records.seq_list, args.word_size) else: p = word_pattern.read(args.word_pattern) if args.reduce_alphabet: p = p.reduce_alphabet(seqcontent.get_reduced_alphabet(args.molecule)) if args.merge_revcomp: p = p.merge_revcomp() freqs = word_vector.Freqs(seq_records.length_list, p) dist = word_distance.Distance(freqs, args.distance) matrix = distmatrix.create(seq_records.id_list, dist) if args.out: oh = open(args.out, 'w') matrix.write_to_file(oh, args.outfmt) oh.close() else: matrix.display(args.outfmt)
def validate_args(parser): args = parser.parse_args() if args.word_size: if args.word_size < 3: parser.error('Word size must be >= 3') elif args.word_patterns: l = [] for i in range(0, 3): try: p = word_pattern.read(args.word_patterns[i]) l.append(p) except Exception: parser.error('Invalid format for word pattern: {0}'.format( args.word_patterns[i].name)) if len(l) == 3: # check if follow rule k, k1, k2 = [len(p.pat_list[0]) for p in l] if not (k == k1 + 1 == k2 + 2): parser.error( '''Word pattern lengths do not follow k, k-1, k-2''') args.word_patterns = l else: parser.error("Specify either: --word_size or --word_pattern.") return args
def test_input_output_file_pattern(self): for wordpos in [True, False]: p1 = word_pattern.create(self.dna_records.seq_list, word_size=1, wordpos=wordpos) oh = open(utils.get_test_data('pattern.txt'), 'w') oh.write(p1.format()) oh.close() fh = open(utils.get_test_data('pattern.txt')) p2 = word_pattern.read(fh) fh.close() self.assertEqual(p1.format(), p2.format()) os.remove(utils.get_test_data('pattern.txt'))
def validate_args(parser): args = parser.parse_args() if args.word_size: if args.word_size < 1: parser.error('word size must be >= 1') elif args.word_pattern: p = word_pattern.read(args.word_pattern) if not p.pos_list: e = "{0} does not contain info on word positions.\n" e += "Please use: create_wordpattern.py with" e += " --word_position option." parser.error(e.format(args.word_pattern.name)) else: args.word_pattern = p else: parser.error("Specify either: --word_size or --word_pattern.") return args
def main(): parser = get_parser() args = validate_args(parser) seq_records = seqrecords.read_fasta(args.fasta) if args.word_size: p = word_pattern.create(seq_records.seq_list, args.word_size) else: p = word_pattern.read(args.word_pattern) bools = word_vector.Bools(seq_records.length_list, p) dist = word_bool_distance.Distance(bools, args.distance) matrix = distmatrix.create(seq_records.id_list, dist) if args.out: oh = open(args.out, 'w') matrix.write_to_file(oh, args.outfmt) oh.close() else: matrix.display(args.outfmt)
def main(): parser = get_parser() args = validate_args(parser) seq_records = seqrecords.read_fasta(args.fasta) if args.word_size: p = word_pattern.create(seq_records.seq_list, args.word_size) else: p = word_pattern.read(args.word_pattern) veccls = {'counts': word_vector.Counts, 'freqs': word_vector.Freqs} vecclsw = { 'counts': word_vector.CountsWeight, 'freqs': word_vector.FreqsWeight } if args.vector == 'counts' or args.vector == 'freqs': if args.char_weights is None: vec = veccls[args.vector](seq_records.length_list, p) else: weightmodel = word_vector.WeightModel( char_weights=args.char_weights) vec = vecclsw[args.vector](seq_records.length_list, p, weightmodel) else: if args.alphabet_size: freqmodel = word_vector.EqualFreqs( alphabet_size=args.alphabet_size) else: freqmodel = word_vector.EquilibriumFreqs(args.char_freqs) vec = word_vector.FreqsStd(seq_records.length_list, p, freqmodel) dist = word_distance.Distance(vec, args.distance) matrix = distmatrix.create(seq_records.id_list, dist) if args.out: oh = open(args.out, 'w') matrix.write_to_file(oh, args.outfmt) oh.close() else: matrix.display(args.outfmt)