def main(): parser = get_parser() args = validate_args(parser) seq_records = seqrecords.read_fasta(args.fasta) if args.word_size: p = word_pattern.create(seq_records.seq_list, args.word_size) else: p = word_pattern.read(args.word_pattern) if args.reduce_alphabet: p = p.reduce_alphabet(seqcontent.get_reduced_alphabet(args.molecule)) if args.merge_revcomp: p = p.merge_revcomp() freqs = word_vector.Freqs(seq_records.length_list, p) dist = word_distance.Distance(freqs, args.distance) matrix = distmatrix.create(seq_records.id_list, dist) if args.out: oh = open(args.out, 'w') matrix.write_to_file(oh, args.outfmt) oh.close() else: matrix.display(args.outfmt)
def main(): parser = get_parser() args = validate_args(parser) seq_records = seqrecords.read_fasta(args.fasta) dist = wmetric.Distance(seq_records, args.matrix) matrix = distmatrix.create(seq_records.id_list, dist) if args.out: oh = open(args.out, 'w') matrix.write_to_file(oh, args.outfmt) oh.close() else: matrix.display(args.outfmt)
def main(): parser = get_parser() args = validate_args(parser) seq_records = seqrecords.read_fasta(args.fasta) vector = bbc.create_vectors(seq_records, args.k, alphabet=args.alphabet) dist = bbc.Distance(vector) matrix = distmatrix.create(seq_records.id_list, dist) if args.out: oh = open(args.out, 'w') matrix.write_to_file(oh, args.outfmt) oh.close() else: matrix.display(args.outfmt)
def main(): parser = get_parser() args = validate_args(parser) seq_records = seqrecords.read_fasta(args.fasta) if args.vector == '2DSV': vector = graphdna.create_2DSGraphVectors(seq_records) elif args.vector == '2DNV': vector = graphdna.create_2DNGraphVectors(seq_records) else: vector = graphdna.create_2DMGraphVectors(seq_records, args.ndim) dist = graphdna.Distance(vector) matrix = distmatrix.create(seq_records.id_list, dist) if args.out: oh = open(args.out, 'w') matrix.write_to_file(oh, args.outfmt) oh.close() else: matrix.display(args.outfmt)
def main(): parser = get_parser() args = validate_args(parser) seq_records = seqrecords.read_fasta(args.fasta) if args.word_size: p = word_pattern.create(seq_records.seq_list, args.word_size) else: p = word_pattern.read(args.word_pattern) bools = word_vector.Bools(seq_records.length_list, p) dist = word_bool_distance.Distance(bools, args.distance) matrix = distmatrix.create(seq_records.id_list, dist) if args.out: oh = open(args.out, 'w') matrix.write_to_file(oh, args.outfmt) oh.close() else: matrix.display(args.outfmt)
def main(): parser = get_parser() args = validate_args(parser) seq_records = seqrecords.read_fasta(args.fasta) if args.word_size: p = word_pattern.create(seq_records.seq_list, args.word_size) else: p = word_pattern.read(args.word_pattern) veccls = {'counts': word_vector.Counts, 'freqs': word_vector.Freqs} vecclsw = { 'counts': word_vector.CountsWeight, 'freqs': word_vector.FreqsWeight } if args.vector == 'counts' or args.vector == 'freqs': if args.char_weights is None: vec = veccls[args.vector](seq_records.length_list, p) else: weightmodel = word_vector.WeightModel( char_weights=args.char_weights) vec = vecclsw[args.vector](seq_records.length_list, p, weightmodel) else: if args.alphabet_size: freqmodel = word_vector.EqualFreqs( alphabet_size=args.alphabet_size) else: freqmodel = word_vector.EquilibriumFreqs(args.char_freqs) vec = word_vector.FreqsStd(seq_records.length_list, p, freqmodel) dist = word_distance.Distance(vec, args.distance) matrix = distmatrix.create(seq_records.id_list, dist) if args.out: oh = open(args.out, 'w') matrix.write_to_file(oh, args.outfmt) oh.close() else: matrix.display(args.outfmt)
def main(): parser = get_parser() args = validate_args(parser) seq_records = seqrecords.read_fasta(args.fasta) patterns = [] for i in range(args.min_word_size, args.max_word_size + 1): p = word_pattern.create(seq_records.seq_list, i) patterns.append(p) vecs = [] if args.char_weights is not None: weightmodel = word_vector.WeightModel(char_weights=args.char_weights) vecklas = { 'counts': word_vector.CountsWeight, 'freqs': word_vector.FreqsWeight }[args.vector] kwargs = { 'seq_lengths': seq_records.length_list, 'weightmodel': weightmodel } else: vecklas = { 'counts': word_vector.Counts, 'freqs': word_vector.Freqs }[args.vector] kwargs = {'seq_lengths': seq_records.length_list} for p in patterns: v = vecklas(patterns=p, **kwargs) vecs.append(v) dist = word_d2.Distance(vecs) matrix = distmatrix.create(seq_records.id_list, dist) if args.out: oh = open(args.out, 'w') matrix.write_to_file(oh, args.outfmt) oh.close() else: matrix.display(args.outfmt)
def main(): parser = get_parser() args = validate_args(parser) seq_records = seqrecords.read_fasta(args.fasta) if args.word_size: p = word_pattern.create(seq_records.seq_list, args.word_size, True) else: p = args.word_pattern vector = word_rtd.create_vector(seq_records.count, p) dist = word_rtd.Distance(vector, args.distance) matrix = distmatrix.create(seq_records.id_list, dist) if args.out: oh = open(args.out, 'w') matrix.write_to_file(oh, args.outfmt) oh.close() else: matrix.display(args.outfmt)
def main(): parser = get_parser() args = validate_args(parser) if args.teiresias: args.fasta.close() p = word_pattern.run_teiresias(args.fasta.name, w=args.word_size, l=args.l, k=args.k, output_filename=args.out) else: seq_records = seqrecords.read_fasta(args.fasta) args.fasta.close() p = word_pattern.create(seq_records.seq_list, args.word_size, args.word_position) if args.out: oh = open(args.out, 'w') oh.write(p.format()) oh.close() else: print(p.format())
def main(): parser = get_parser() args = validate_args(parser) seq_records = seqrecords.read_fasta(args.fasta) if args.word_patterns: l = args.word_patterns else: l = [] for i in range(args.word_size, args.word_size - 3, -1): p = word_pattern.create(seq_records.seq_list, i) l.append(p) compos = word_vector.Composition(seq_records.length_list, *l) dist = word_distance.Distance(compos, 'angle_cos_diss') matrix = distmatrix.create(seq_records.id_list, dist) if args.out: oh = open(args.out, 'w') matrix.write_to_file(oh, args.outfmt) oh.close() else: matrix.display(args.outfmt)
from alfpy import word_d2 infile = "" outfile = "" method = "" if len(sys.argv) > 4: sys.stderr.write("Too many arguments\n") exit(1) else: method = sys.argv[1] infile = sys.argv[2] outfile = sys.argv[3] input_file = open(infile, 'r') seq_records = seqrecords.read_fasta(input_file) input_file.close() #choose one method to compute distance matrix if method == "bbc": alphabet = seqcontent.get_alphabet('protein') vector = bbc.create_vectors(seq_records, 10, alphabet) dist = bbc.Distance(vector) matrix = distmatrix.create(seq_records.id_list, dist) matrix.display() elif method == "ncd": dist = ncd.Distance(seq_records) matrix = distmatrix.create(seq_records.id_list, dist) matrix.display()
def create_from_fasta(handle, word_size=1, wordpos=False): """Create word patterns (Pattern object) from a FASTA file""" seq_records = seqrecords.read_fasta(handle) return create(seq_records.seq_list, word_size=word_size, wordpos=wordpos)