def test_get_template_alignment_column_mask(self): # make sure the literal Lane mask matches the real file's MD5 (without # the trailing newline) exp = 'e3e5f2804e29694e03a01fd9cc157a53' obs = safe_md5( StringIO(get_template_alignment_column_mask())).hexdigest() self.assertEqual(obs, exp)
def test_get_template_alignment_column_mask(self): # make sure the literal Lane mask matches the real file's MD5 (without # the trailing newline) exp = 'e3e5f2804e29694e03a01fd9cc157a53' obs = safe_md5(BytesIO( get_template_alignment_column_mask())).hexdigest() self.assertEqual(obs, exp)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) # build the output filepath and open it any problems can be caught # before starting the work try: mkdir(opts.output_dir) except OSError: pass input_dir, input_filename = split(opts.input_fasta_file) input_basename, ext = splitext(input_filename) if getsize(opts.input_fasta_file) == 0: raise ValueError("An empty fasta file was provided. " "Did the alignment complete sucessfully? " "Did PyNAST discard all sequences due to too-stringent minimum length " "or minimum percent ID settings?") output_fp = '%s/%s_pfiltered.fasta' % (opts.output_dir, input_basename) try: outfile = open(output_fp, 'w') except IOError: raise IOError("Can't open output_filepath for writing: %s" % output_filepath) if not opts.suppress_lane_mask_filter and not opts.entropy_threshold: if opts.lane_mask_fp is not None: lane_mask = open(opts.lane_mask_fp, 'U').read().strip() else: lane_mask = get_template_alignment_column_mask() else: lane_mask = None # open the input and output files infile = open(opts.input_fasta_file, 'U') if opts.remove_outliers: # apply the lanemask/gap removal, then remove outliers seq_gen = apply_lane_mask_and_gap_filter(infile, lane_mask, opts.allowed_gap_frac, entropy_threshold=opts.entropy_threshold) filtered_aln = remove_outliers(seq_gen, opts.threshold) for seq in filtered_aln: outfile.write(seq.to_fasta()) outfile.write('\n') else: # just apply the lanemask/gap removal for result in apply_lane_mask_and_gap_filter(infile, lane_mask, opts.allowed_gap_frac, entropy_threshold=opts.entropy_threshold): outfile.write(result) infile.close() outfile.close()
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) # build the output filepath and open it any problems can be caught # before starting the work try: mkdir(opts.output_dir) except OSError: pass input_dir, input_filename = split(opts.input_fasta_file) input_basename, ext = splitext(input_filename) if getsize(opts.input_fasta_file) == 0: raise ValueError("An empty fasta file was provided. " "Did the alignment complete sucessfully? " "Did PyNAST discard all sequences due to too-stringent minimum length " "or minimum percent ID settings?") output_fp = '%s/%s_pfiltered.fasta' % (opts.output_dir, input_basename) try: outfile = open(output_fp, 'w') except IOError: raise IOError("Can't open output_filepath for writing: %s" % output_filepath) if not opts.suppress_lane_mask_filter and not opts.entropy_threshold: if opts.lane_mask_fp is not None: lane_mask = open(opts.lane_mask_fp, 'U').read().strip() else: lane_mask = get_template_alignment_column_mask() else: lane_mask = None # open the input and output files infile = open(opts.input_fasta_file, 'U') if opts.remove_outliers: # apply the lanemask/gap removal, then remove outliers seq_gen = apply_lane_mask_and_gap_filter(infile, lane_mask, opts.allowed_gap_frac, verbose=opts.verbose, entropy_threshold=opts.entropy_threshold) filtered_aln = remove_outliers(seq_gen, opts.threshold) for seq in filtered_aln.Seqs: outfile.write(seq.toFasta()) outfile.write('\n') else: # just apply the lanemask/gap removal for result in apply_lane_mask_and_gap_filter(infile, lane_mask, opts.allowed_gap_frac, verbose=opts.verbose, entropy_threshold=opts.entropy_threshold): outfile.write(result) infile.close() outfile.close()