コード例 #1
0
 def test_get_template_alignment_column_mask(self):
     # make sure the literal Lane mask matches the real file's MD5 (without
     # the trailing newline)
     exp = 'e3e5f2804e29694e03a01fd9cc157a53'
     obs = safe_md5(
         StringIO(get_template_alignment_column_mask())).hexdigest()
     self.assertEqual(obs, exp)
コード例 #2
0
 def test_get_template_alignment_column_mask(self):
     # make sure the literal Lane mask matches the real file's MD5 (without
     # the trailing newline)
     exp = 'e3e5f2804e29694e03a01fd9cc157a53'
     obs = safe_md5(BytesIO(
         get_template_alignment_column_mask())).hexdigest()
     self.assertEqual(obs, exp)
コード例 #3
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    # build the output filepath and open it any problems can be caught
    # before starting the work
    try:
        mkdir(opts.output_dir)
    except OSError:
        pass
    input_dir, input_filename = split(opts.input_fasta_file)
    input_basename, ext = splitext(input_filename)

    if getsize(opts.input_fasta_file) == 0:
        raise ValueError("An empty fasta file was provided. "
                         "Did the alignment complete sucessfully? "
                         "Did PyNAST discard all sequences due to too-stringent minimum length "
                         "or minimum percent ID settings?")

    output_fp = '%s/%s_pfiltered.fasta' % (opts.output_dir, input_basename)

    try:
        outfile = open(output_fp, 'w')
    except IOError:
        raise IOError("Can't open output_filepath for writing: %s"
                      % output_filepath)

    if not opts.suppress_lane_mask_filter and not opts.entropy_threshold:
        if opts.lane_mask_fp is not None:
            lane_mask = open(opts.lane_mask_fp, 'U').read().strip()
        else:
            lane_mask = get_template_alignment_column_mask()
    else:
        lane_mask = None

    # open the input and output files
    infile = open(opts.input_fasta_file, 'U')

    if opts.remove_outliers:
        # apply the lanemask/gap removal, then remove outliers

        seq_gen = apply_lane_mask_and_gap_filter(infile, lane_mask,
                                                 opts.allowed_gap_frac,
                                                 entropy_threshold=opts.entropy_threshold)

        filtered_aln = remove_outliers(seq_gen, opts.threshold)
        for seq in filtered_aln:
            outfile.write(seq.to_fasta())
            outfile.write('\n')

    else:
        # just apply the lanemask/gap removal
        for result in apply_lane_mask_and_gap_filter(infile, lane_mask,
                                                     opts.allowed_gap_frac,
                                                     entropy_threshold=opts.entropy_threshold):
            outfile.write(result)
    infile.close()
    outfile.close()
コード例 #4
0
ファイル: filter_alignment.py プロジェクト: shiffer1/qiime
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    # build the output filepath and open it any problems can be caught
    # before starting the work
    try:
        mkdir(opts.output_dir)
    except OSError:
        pass
    input_dir, input_filename = split(opts.input_fasta_file)
    input_basename, ext = splitext(input_filename)

    if getsize(opts.input_fasta_file) == 0:
        raise ValueError("An empty fasta file was provided. "
                         "Did the alignment complete sucessfully? "
                         "Did PyNAST discard all sequences due to too-stringent minimum length "
                         "or minimum percent ID settings?")

    output_fp = '%s/%s_pfiltered.fasta' % (opts.output_dir, input_basename)

    try:
        outfile = open(output_fp, 'w')
    except IOError:
        raise IOError("Can't open output_filepath for writing: %s"
                      % output_filepath)

    if not opts.suppress_lane_mask_filter and not opts.entropy_threshold:
        if opts.lane_mask_fp is not None:
            lane_mask = open(opts.lane_mask_fp, 'U').read().strip()
        else:
            lane_mask = get_template_alignment_column_mask()
    else:
        lane_mask = None

    # open the input and output files
    infile = open(opts.input_fasta_file, 'U')

    if opts.remove_outliers:
        # apply the lanemask/gap removal, then remove outliers

        seq_gen = apply_lane_mask_and_gap_filter(infile, lane_mask,
                                                 opts.allowed_gap_frac, verbose=opts.verbose,
                                                 entropy_threshold=opts.entropy_threshold)

        filtered_aln = remove_outliers(seq_gen, opts.threshold)
        for seq in filtered_aln.Seqs:
            outfile.write(seq.toFasta())
            outfile.write('\n')

    else:
        # just apply the lanemask/gap removal
        for result in apply_lane_mask_and_gap_filter(infile, lane_mask,
                                                     opts.allowed_gap_frac, verbose=opts.verbose,
                                                     entropy_threshold=opts.entropy_threshold):
            outfile.write(result)
    infile.close()
    outfile.close()