Esempio n. 1
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
     
    sub_taxonomy_list=[]
    taxonomic_rank_dictionary=create_taxonomic_rank_dictionary(opts.input_taxonomy_fps)
    otu_dictionary=create_otu_dictionary(opts.input_fasta_fps)
    
    if opts.taxonomy_level=="":
        if path.isdir(opts.output_dir)==False:
            mkdir(opts.output_dir,0755)
            sub_taxonomy=split_taxonomy_list(opts.input_taxonomy_fps,7,opts.output_dir)
            sub_taxonomy_list.append(sub_taxonomy)
            assign_otuID_to_seqs(taxonomic_rank_dictionary,otu_dictionary,sub_taxonomy_list,opts.output_dir)
        else:
            sub_taxonomy=split_taxonomy_list(opts.input_taxonomy_fps,7,opts.output_dir)
            sub_taxonomy_list.append(sub_taxonomy)
            assign_otuID_to_seqs(taxonomic_rank_dictionary,otu_dictionary,sub_taxonomy_list,opts.output_dir)
    else:
        if path.isdir(opts.output_dir)==False:
            mkdir(opts.output_dir,0755)
            sub_taxonomy=split_taxonomy_list(opts.input_taxonomy_fps,opts.taxonomy_level,opts.output_dir)
            sub_taxonomy_list.append(sub_taxonomy)
            assign_otuID_to_seqs(taxonomic_rank_dictionary,otu_dictionary,sub_taxonomy_list,opts.output_dir)
        else:
            sub_taxonomy=split_taxonomy_list(opts.input_taxonomy_fps,opts.taxonomy_level,opts.output_dir)
            sub_taxonomy_list.append(sub_taxonomy)
            assign_otuID_to_seqs(taxonomic_rank_dictionary,otu_dictionary,sub_taxonomy_list,opts.output_dir)
 
    build_cm_models(opts.output_dir)
Esempio n. 2
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    sub_taxonomy_list = []
    if opts.taxonomy_level == "":
        mkdir(opts.output_dir, 0755)
        sub_taxonomy = split_taxonomy_list(opts.input_taxonomy_fps, "7", opts.output_dir)
        sub_taxonomy_list.append(sub_taxonomy)
        assign_seqID_to_seqs(taxon_list, opts.input_fasta_fps, opts.output_dir)
    else:
        mkdir(opts.output_dir, 0755)
        sub_taxonomy = split_taxonomy_list(opts.input_taxonomy_fps, opts.taxonomy_level, opts.output_dir)
        sub_taxonomy_list.append(sub_taxonomy)
        assign_seqID_to_seqs(sub_taxonomy_list, opts.input_fasta_fps, opts.output_dir)
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    
    Query_collection=[] 
    rank_collection=[]
    Query_dict=defaultdict(list)
    temp_dir_name=tempfile.mkdtemp(prefix='root_')
    level=0
    for root,dirs,files in os.walk(opts.input_HMM_fp):
        print root
        #print dirs
        #print files
        #print ".................."
        path_to_db=os.path.join(root,'db')
        path_to_result=os.path.join(root,'result.out')
        if level==0:
            stdout,stderr,return_value = qcli_system_call('hmmscan '+path_to_db+' '+opts.input_query_fp+' > '+path_to_result)
            if return_value != 0:
                print 'Stdout:\n%s\nStderr:%s\n' % (stdout,stderr)
                exit(1)
            HMM_result=open(path_to_result,'U')
            HMM_choice_list,HMM_Query_list,HMM_choice_list_with_ID=search_HMM(HMM_result)
            create_temp_test_seq_file(temp_dir_name,HMM_choice_list_with_ID,open(opts.input_query_fp,'U'))
            rank_collection.extend(HMM_choice_list)
            Query_collection.extend(HMM_Query_list)
            for Query_ID, rank in HMM_choice_list_with_ID:
                Query_dict[Query_ID].append(rank)
        else:
            if os.path.basename(root) in rank_collection:
                path_to_test_seq=os.path.join(temp_dir_name,os.path.basename(root)+'.fasta')
                stdout,stderr,return_value = qcli_system_call('hmmscan '+path_to_db+' '+path_to_test_seq+' > '+path_to_result)
                if return_value != 0:
                    print 'Stdout:\n%s\nStderr:%s\n' % (stdout,stderr)
                    exit(1)
                HMM_result=open(path_to_result,'U')
                HMM_choice_list,HMM_Query_list,HMM_choice_list_with_ID=search_HMM(HMM_result)
                create_temp_test_seq_file(temp_dir_name,HMM_choice_list_with_ID,open(path_to_test_seq,'U'))
                rank_collection.extend(HMM_choice_list)
                for Query_ID, rank in HMM_choice_list_with_ID:
                    Query_dict[Query_ID].append(rank)
        level+=1
    shutil.rmtree(temp_dir_name)
    taxonomy_assignment_to_query_seq(Query_dict,Query_collection)
Esempio n. 4
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
Esempio n. 5
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    
    search_cmfile_to_cmpress(opts.input_dir)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    barcode_type = opts.barcode_type
    max_barcode_errors = opts.max_barcode_errors
    mapping_fp = opts.mapping_fp
    sequence_read_fps = opts.sequence_read_fps
    min_consensus = opts.min_consensus
    max_cluster_ratio = opts.max_cluster_ratio
    output_dir = opts.output_dir
    min_difference_in_bcs = opts.min_difference_in_bcs
    fwd_length = opts.fwd_length
    rev_length = opts.rev_length
    min_reads_per_random_bc = opts.min_reads_per_random_bc
    min_diff_in_clusters = opts.min_difference_in_clusters
    barcode_column = opts.header_barcode_column
    reverse_primer_column = opts.reverse_primer_column

    create_dir(output_dir)
    fwd_consensus_outfile = open(path.join(output_dir, "fwd.fna"), "w")
    rev_consensus_outfile = open(path.join(output_dir, "rev.fna"), "w")
    log_file = open(path.join(output_dir, "log.txt"), "w")

    if barcode_type == 'golay_12':
        barcode_correction_fn = decode_golay_12
        barcode_len = 12
    else:
        barcode_correction_fn = None

        try:
            barcode_len = int(barcode_type)
        except ValueError:
            option_parser.error("Invalid barcode type '%s'. The barcode type "
                                "must be either golay_12 or a positive "
                                "integer indicating the barcode length." %
                                barcode_type)

    if max_barcode_errors < 0:
        option_parser.error("--max_barcode_errors must be greater than or "
                            "equal to zero. You provided %.4f." %
                            max_barcode_errors)

    if barcode_len < 1:
        option_parser.error("Invalid barcode length: %d. Must be greater "
                            "than zero." % barcode_len)

    if len(sequence_read_fps) != 2:
        option_parser.error("You must provide exactly two sequence read "
                            "filepaths, the first for forward reads and "
                            "second for reverse reads. You specified %d "
                            "filepaths." % len(sequence_read_fps))

    fwd_read_f = open(sequence_read_fps[0], 'U')
    rev_read_f = open(sequence_read_fps[1], 'U')

    map_f = open(mapping_fp, 'U')

    (consensus_seq_lookup,
     log_out) = get_LEA_seq_consensus_seqs(fwd_read_f,
                                           rev_read_f,
                                           map_f,
                                           output_dir,
                                           barcode_type,
                                           barcode_len,
                                           barcode_correction_fn,
                                           max_barcode_errors,
                                           min_consensus,
                                           max_cluster_ratio,
                                           min_difference_in_bcs,
                                           fwd_length,
                                           rev_length,
                                           min_reads_per_random_bc,
                                           min_diff_in_clusters,
                                           barcode_column,
                                           reverse_primer_column)

    for sample_id in consensus_seq_lookup:
        for bc_index, rand_bc in enumerate(consensus_seq_lookup[sample_id]):
            consensus_seq = consensus_seq_lookup[sample_id][rand_bc]
            fwd_consensus, rev_consensus = consensus_seq.split('^')
            fwd_consensus_outfile.write(">{}_{}\n{}\n".format(
                sample_id, bc_index, fwd_consensus))
            rev_consensus_outfile.write(">{}_{}\n{}\n".format(
                sample_id, bc_index, rev_consensus))

    log_file.write(log_out)
    log_file.close()

    fwd_read_f.close()
    rev_read_f.close()
    fwd_consensus_outfile.close()
    rev_consensus_outfile.close()
    map_f.close()
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    barcode_type = opts.barcode_type
    max_barcode_errors = opts.max_barcode_errors
    mapping_fp = opts.mapping_fp
    sequence_read_fps = opts.sequence_read_fps
    min_consensus = opts.min_consensus
    max_cluster_ratio = opts.max_cluster_ratio
    output_dir = opts.output_dir
    min_difference_in_bcs = opts.min_difference_in_bcs
    fwd_length = opts.fwd_length
    rev_length = opts.rev_length
    min_reads_per_random_bc = opts.min_reads_per_random_bc
    min_diff_in_clusters = opts.min_difference_in_clusters
    barcode_column = opts.header_barcode_column
    reverse_primer_column = opts.reverse_primer_column
    create_dir(output_dir)
    fwd_consensus_outfile = open(path.join(output_dir, "fwd.fna"), "w")
    rev_consensus_outfile = open(path.join(output_dir, "rev.fna"), "w")
    log_file = open(path.join(output_dir, "log.txt"), "w")

    if barcode_type == 'golay_12':
        barcode_correction_fn = decode_golay_12
        barcode_len = 12
    else:
        barcode_correction_fn = None

        try:
            barcode_len = int(barcode_type)
        except ValueError:
            option_parser.error("Invalid barcode type '%s'. The barcode type "
                                "must be either golay_12 or a positive "
                                "integer indicating the barcode length." %
                                barcode_type)

    if max_barcode_errors < 0:
        option_parser.error("--max_barcode_errors must be greater than or "
                            "equal to zero. You provided %.4f." %
                            max_barcode_errors)

    if min_diff_in_clusters < 0 or min_diff_in_clusters > 1:
        option_parser.error("--min_difference_in_clusters must be "
                            "between 0 to 1. You provided %.4f." %
                            min_diff_in_clusters)

    if min_difference_in_bcs < 0 or min_difference_in_bcs > 1:
        option_parser.error("--min_difference_in_bcs must be between 0 to 1."
                            " You provided %.4f." % min_difference_in_bcs)

    if barcode_len < 1:
        option_parser.error("Invalid barcode length: %d. Must be greater "
                            "than zero." % barcode_len)

    if len(sequence_read_fps) != 2:
        option_parser.error("You must provide exactly two sequence read "
                            "filepaths, the first for forward reads and "
                            "second for reverse reads. You specified %d "
                            "filepaths." % len(sequence_read_fps))

    fwd_read_f = open(sequence_read_fps[0], 'U')
    rev_read_f = open(sequence_read_fps[1], 'U')

    map_f = open(mapping_fp, 'U')

    (consensus_seq_lookup, log_out) = get_LEA_seq_consensus_seqs(
        fwd_read_f, rev_read_f, map_f, output_dir, barcode_type, barcode_len,
        barcode_correction_fn, max_barcode_errors, min_consensus,
        max_cluster_ratio, min_difference_in_bcs, fwd_length, rev_length,
        min_reads_per_random_bc, min_diff_in_clusters, barcode_column,
        reverse_primer_column)

    for sample_id in consensus_seq_lookup:
        for bc_index, rand_bc in enumerate(consensus_seq_lookup[sample_id]):
            consensus_seq = consensus_seq_lookup[sample_id][rand_bc]
            fwd_consensus, rev_consensus = consensus_seq.split('^')
            fwd_consensus_outfile.write(">{}_{}\n{}\n".format(
                sample_id, bc_index, fwd_consensus))
            rev_consensus_outfile.write(">{}_{}\n{}\n".format(
                sample_id, bc_index, rev_consensus))

    log_file.write(log_out)
    log_file.close()

    fwd_read_f.close()
    rev_read_f.close()
    fwd_consensus_outfile.close()
    rev_consensus_outfile.close()
    map_f.close()