def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) sub_taxonomy_list=[] taxonomic_rank_dictionary=create_taxonomic_rank_dictionary(opts.input_taxonomy_fps) otu_dictionary=create_otu_dictionary(opts.input_fasta_fps) if opts.taxonomy_level=="": if path.isdir(opts.output_dir)==False: mkdir(opts.output_dir,0755) sub_taxonomy=split_taxonomy_list(opts.input_taxonomy_fps,7,opts.output_dir) sub_taxonomy_list.append(sub_taxonomy) assign_otuID_to_seqs(taxonomic_rank_dictionary,otu_dictionary,sub_taxonomy_list,opts.output_dir) else: sub_taxonomy=split_taxonomy_list(opts.input_taxonomy_fps,7,opts.output_dir) sub_taxonomy_list.append(sub_taxonomy) assign_otuID_to_seqs(taxonomic_rank_dictionary,otu_dictionary,sub_taxonomy_list,opts.output_dir) else: if path.isdir(opts.output_dir)==False: mkdir(opts.output_dir,0755) sub_taxonomy=split_taxonomy_list(opts.input_taxonomy_fps,opts.taxonomy_level,opts.output_dir) sub_taxonomy_list.append(sub_taxonomy) assign_otuID_to_seqs(taxonomic_rank_dictionary,otu_dictionary,sub_taxonomy_list,opts.output_dir) else: sub_taxonomy=split_taxonomy_list(opts.input_taxonomy_fps,opts.taxonomy_level,opts.output_dir) sub_taxonomy_list.append(sub_taxonomy) assign_otuID_to_seqs(taxonomic_rank_dictionary,otu_dictionary,sub_taxonomy_list,opts.output_dir) build_cm_models(opts.output_dir)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) sub_taxonomy_list = [] if opts.taxonomy_level == "": mkdir(opts.output_dir, 0755) sub_taxonomy = split_taxonomy_list(opts.input_taxonomy_fps, "7", opts.output_dir) sub_taxonomy_list.append(sub_taxonomy) assign_seqID_to_seqs(taxon_list, opts.input_fasta_fps, opts.output_dir) else: mkdir(opts.output_dir, 0755) sub_taxonomy = split_taxonomy_list(opts.input_taxonomy_fps, opts.taxonomy_level, opts.output_dir) sub_taxonomy_list.append(sub_taxonomy) assign_seqID_to_seqs(sub_taxonomy_list, opts.input_fasta_fps, opts.output_dir)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) Query_collection=[] rank_collection=[] Query_dict=defaultdict(list) temp_dir_name=tempfile.mkdtemp(prefix='root_') level=0 for root,dirs,files in os.walk(opts.input_HMM_fp): print root #print dirs #print files #print ".................." path_to_db=os.path.join(root,'db') path_to_result=os.path.join(root,'result.out') if level==0: stdout,stderr,return_value = qcli_system_call('hmmscan '+path_to_db+' '+opts.input_query_fp+' > '+path_to_result) if return_value != 0: print 'Stdout:\n%s\nStderr:%s\n' % (stdout,stderr) exit(1) HMM_result=open(path_to_result,'U') HMM_choice_list,HMM_Query_list,HMM_choice_list_with_ID=search_HMM(HMM_result) create_temp_test_seq_file(temp_dir_name,HMM_choice_list_with_ID,open(opts.input_query_fp,'U')) rank_collection.extend(HMM_choice_list) Query_collection.extend(HMM_Query_list) for Query_ID, rank in HMM_choice_list_with_ID: Query_dict[Query_ID].append(rank) else: if os.path.basename(root) in rank_collection: path_to_test_seq=os.path.join(temp_dir_name,os.path.basename(root)+'.fasta') stdout,stderr,return_value = qcli_system_call('hmmscan '+path_to_db+' '+path_to_test_seq+' > '+path_to_result) if return_value != 0: print 'Stdout:\n%s\nStderr:%s\n' % (stdout,stderr) exit(1) HMM_result=open(path_to_result,'U') HMM_choice_list,HMM_Query_list,HMM_choice_list_with_ID=search_HMM(HMM_result) create_temp_test_seq_file(temp_dir_name,HMM_choice_list_with_ID,open(path_to_test_seq,'U')) rank_collection.extend(HMM_choice_list) for Query_ID, rank in HMM_choice_list_with_ID: Query_dict[Query_ID].append(rank) level+=1 shutil.rmtree(temp_dir_name) taxonomy_assignment_to_query_seq(Query_dict,Query_collection)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) search_cmfile_to_cmpress(opts.input_dir)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) barcode_type = opts.barcode_type max_barcode_errors = opts.max_barcode_errors mapping_fp = opts.mapping_fp sequence_read_fps = opts.sequence_read_fps min_consensus = opts.min_consensus max_cluster_ratio = opts.max_cluster_ratio output_dir = opts.output_dir min_difference_in_bcs = opts.min_difference_in_bcs fwd_length = opts.fwd_length rev_length = opts.rev_length min_reads_per_random_bc = opts.min_reads_per_random_bc min_diff_in_clusters = opts.min_difference_in_clusters barcode_column = opts.header_barcode_column reverse_primer_column = opts.reverse_primer_column create_dir(output_dir) fwd_consensus_outfile = open(path.join(output_dir, "fwd.fna"), "w") rev_consensus_outfile = open(path.join(output_dir, "rev.fna"), "w") log_file = open(path.join(output_dir, "log.txt"), "w") if barcode_type == 'golay_12': barcode_correction_fn = decode_golay_12 barcode_len = 12 else: barcode_correction_fn = None try: barcode_len = int(barcode_type) except ValueError: option_parser.error("Invalid barcode type '%s'. The barcode type " "must be either golay_12 or a positive " "integer indicating the barcode length." % barcode_type) if max_barcode_errors < 0: option_parser.error("--max_barcode_errors must be greater than or " "equal to zero. You provided %.4f." % max_barcode_errors) if barcode_len < 1: option_parser.error("Invalid barcode length: %d. Must be greater " "than zero." % barcode_len) if len(sequence_read_fps) != 2: option_parser.error("You must provide exactly two sequence read " "filepaths, the first for forward reads and " "second for reverse reads. You specified %d " "filepaths." % len(sequence_read_fps)) fwd_read_f = open(sequence_read_fps[0], 'U') rev_read_f = open(sequence_read_fps[1], 'U') map_f = open(mapping_fp, 'U') (consensus_seq_lookup, log_out) = get_LEA_seq_consensus_seqs(fwd_read_f, rev_read_f, map_f, output_dir, barcode_type, barcode_len, barcode_correction_fn, max_barcode_errors, min_consensus, max_cluster_ratio, min_difference_in_bcs, fwd_length, rev_length, min_reads_per_random_bc, min_diff_in_clusters, barcode_column, reverse_primer_column) for sample_id in consensus_seq_lookup: for bc_index, rand_bc in enumerate(consensus_seq_lookup[sample_id]): consensus_seq = consensus_seq_lookup[sample_id][rand_bc] fwd_consensus, rev_consensus = consensus_seq.split('^') fwd_consensus_outfile.write(">{}_{}\n{}\n".format( sample_id, bc_index, fwd_consensus)) rev_consensus_outfile.write(">{}_{}\n{}\n".format( sample_id, bc_index, rev_consensus)) log_file.write(log_out) log_file.close() fwd_read_f.close() rev_read_f.close() fwd_consensus_outfile.close() rev_consensus_outfile.close() map_f.close()
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) barcode_type = opts.barcode_type max_barcode_errors = opts.max_barcode_errors mapping_fp = opts.mapping_fp sequence_read_fps = opts.sequence_read_fps min_consensus = opts.min_consensus max_cluster_ratio = opts.max_cluster_ratio output_dir = opts.output_dir min_difference_in_bcs = opts.min_difference_in_bcs fwd_length = opts.fwd_length rev_length = opts.rev_length min_reads_per_random_bc = opts.min_reads_per_random_bc min_diff_in_clusters = opts.min_difference_in_clusters barcode_column = opts.header_barcode_column reverse_primer_column = opts.reverse_primer_column create_dir(output_dir) fwd_consensus_outfile = open(path.join(output_dir, "fwd.fna"), "w") rev_consensus_outfile = open(path.join(output_dir, "rev.fna"), "w") log_file = open(path.join(output_dir, "log.txt"), "w") if barcode_type == 'golay_12': barcode_correction_fn = decode_golay_12 barcode_len = 12 else: barcode_correction_fn = None try: barcode_len = int(barcode_type) except ValueError: option_parser.error("Invalid barcode type '%s'. The barcode type " "must be either golay_12 or a positive " "integer indicating the barcode length." % barcode_type) if max_barcode_errors < 0: option_parser.error("--max_barcode_errors must be greater than or " "equal to zero. You provided %.4f." % max_barcode_errors) if min_diff_in_clusters < 0 or min_diff_in_clusters > 1: option_parser.error("--min_difference_in_clusters must be " "between 0 to 1. You provided %.4f." % min_diff_in_clusters) if min_difference_in_bcs < 0 or min_difference_in_bcs > 1: option_parser.error("--min_difference_in_bcs must be between 0 to 1." " You provided %.4f." % min_difference_in_bcs) if barcode_len < 1: option_parser.error("Invalid barcode length: %d. Must be greater " "than zero." % barcode_len) if len(sequence_read_fps) != 2: option_parser.error("You must provide exactly two sequence read " "filepaths, the first for forward reads and " "second for reverse reads. You specified %d " "filepaths." % len(sequence_read_fps)) fwd_read_f = open(sequence_read_fps[0], 'U') rev_read_f = open(sequence_read_fps[1], 'U') map_f = open(mapping_fp, 'U') (consensus_seq_lookup, log_out) = get_LEA_seq_consensus_seqs( fwd_read_f, rev_read_f, map_f, output_dir, barcode_type, barcode_len, barcode_correction_fn, max_barcode_errors, min_consensus, max_cluster_ratio, min_difference_in_bcs, fwd_length, rev_length, min_reads_per_random_bc, min_diff_in_clusters, barcode_column, reverse_primer_column) for sample_id in consensus_seq_lookup: for bc_index, rand_bc in enumerate(consensus_seq_lookup[sample_id]): consensus_seq = consensus_seq_lookup[sample_id][rand_bc] fwd_consensus, rev_consensus = consensus_seq.split('^') fwd_consensus_outfile.write(">{}_{}\n{}\n".format( sample_id, bc_index, fwd_consensus)) rev_consensus_outfile.write(">{}_{}\n{}\n".format( sample_id, bc_index, rev_consensus)) log_file.write(log_out) log_file.close() fwd_read_f.close() rev_read_f.close() fwd_consensus_outfile.close() rev_consensus_outfile.close() map_f.close()