def execute(self): # Defined the temporary directory. if not self.args["--tmpdir"]: self.args["--tmpdir"] = "./tmp" tmp_dir = mio.generate_temp_dir(self.args["--tmpdir"]) # Defined the output directory and output file names. if not self.args["--outdir"]: self.args["--outdir"] = join(".", "contact_map_" + self.args["--name"]) os.makedirs(self.args["--outdir"], exist_ok=True) mtc.generate_contact_map( self.args["--assembly"], self.args["--contig-data"], self.args["--enzyme"], self.args["--name"], self.args["--pairs"], self.args["--outdir"], tmp_dir, self.args["--filter"], self.args["--force"], self.args["--mat-fmt"], self.args["--object"], int(self.args["--min-size"]), self.args["--pcr-dup"], int(self.args["--threads"]), ) # Delete the temporary folder. if not self.args["--no-clean-up"]: shutil.rmtree(tmp_dir) # Delete pyfastx index: os.remove(self.args["--assembly"] + ".fxi")
def execute(self): # Defined the temporary directory. if not self.args["--tmpdir"]: tmp_dir = mio.generate_temp_dir("./tmp") else: tmp_dir = self.args["--tmpdir"] os.makedirs(tmp_dir, exist_ok=True) # Defined the output directory and output file names. if not self.args["--outdir"]: self.args["--outdir"] = "." os.makedirs(self.args["--outdir"], exist_ok=True) recursive_fasta_dir = join(self.args["--outdir"], "recursive_bin") final_fasta_dir = join(self.args["--outdir"], "final_bin") if not exists(recursive_fasta_dir): os.makedirs(recursive_fasta_dir) else: if self.args["--force"]: shutil.rmtree(recursive_fasta_dir) os.makedirs(recursive_fasta_dir) else: logger.error( "%s already existed. Remove directory or use -F argument to overwrite it.", recursive_fasta_dir, ) raise ValueError if not exists(final_fasta_dir): os.makedirs(final_fasta_dir) else: if self.args["--force"]: shutil.rmtree(final_fasta_dir) os.makedirs(final_fasta_dir) else: logger.error( "%s already existed. Remove directory or use -F argument to overwrite it.", final_fasta_dir, ) raise ValueError # Enable file logging now = time.strftime("%Y%m%d%H%M%S") log_file = join(self.args["--outdir"], ("metator_validation_" + now + ".log")) mtl.set_file_handler(log_file) # Transform numeric variable as numeric iterations = int(self.args["--iterations"]) size = int(self.args["--size"]) threads = int(self.args["--threads"]) overlapping_parameter = int(self.args["--overlap"]) / 100 resolution_parameter = float(self.args["--res-param"]) # Check checkM availabilitys if not mio.check_checkm(): logger.error( "CheckM is not in the path. Could not make the iterations") raise NameError # Check correct algorithm value if self.args["--algorithm"] not in ["louvain", "leiden"]: logger.error('algorithm should be either "louvain" or "leiden"') raise ValueError _clustering_matrix_file = mtv.recursive_decontamination( self.args["--algorithm"], self.args["--assembly"], self.args["--cluster-matrix"], self.args["--contigs"], final_fasta_dir, self.args["--fasta"], iterations, self.args["--network"], self.args["--outdir"], overlapping_parameter, recursive_fasta_dir, resolution_parameter, size, tmp_dir, threads, ) # Delete pyfastx index: os.remove(self.args["--assembly"] + ".fxi") # Delete the temporary folder if not self.args["--no-clean-up"]: shutil.rmtree(tmp_dir)
def execute(self): # Defined the temporary directory. if not self.args["--tmpdir"]: tmp_dir = mio.generate_temp_dir("./tmp") else: tmp_dir = self.args["--tmpdir"] os.makedirs(tmp_dir, exist_ok=True) # Defined the output directory and output file names. if not self.args["--outdir"]: self.args["--outdir"] = "." os.makedirs(self.args["--outdir"], exist_ok=True) overlapping_fasta_dir = join(self.args["--outdir"], "overlapping_bin") if not exists(overlapping_fasta_dir): os.makedirs(overlapping_fasta_dir) else: if self.args["--force"]: shutil.rmtree(overlapping_fasta_dir) os.makedirs(overlapping_fasta_dir) else: print(self.args["--force"]) logger.error( "%s already existed. Remove directory or use -F argument to overwrite it.", overlapping_fasta_dir, ) raise ValueError # Enable file logging now = time.strftime("%Y%m%d%H%M%S") log_file = join(self.args["--outdir"], ("metator_" + now + ".log")) mtl.set_file_handler(log_file) # Define variable min_qual = int(self.args["--min-quality"]) iterations = int(self.args["--iterations"]) recursive_iterations = int(self.args["--rec-iter"]) overlapping_parameter = int(self.args["--overlap"]) / 100 recursive_overlapping_parameter = int(self.args["--rec-overlap"]) / 100 size = int(self.args["--size"]) threads = int(self.args["--threads"]) resolution_parameter = float(self.args["--res-param"]) # Check correct algorithm value if self.args["--algorithm"] not in ["louvain", "leiden"]: logger.error('algorithm should be either "louvain" or "leiden"') raise ValueError # Check if normalization in the list of possible normalization. list_normalization = [ "None", "abundance", "length", "RS", "empirical_hit", "theoritical_hit", ] if self.args["--normalization"] not in list_normalization: logger.error( 'Normalization should be among this list: "None", "abundance", "length", "RS", "empirical_hit", "theoritical_hit"' ) raise ValueError enzyme_required = ["RS", "theoritical_hit"] if (self.args["--normalization"] in enzyme_required and not self.args["--enzyme"]): logger.error( 'For "RS" and "theoritical_hit" normalization, enzyme is required.' ) raise ValueError depth_required = ["abundance", "theoritical_hit"] if (self.args["--normalization"] in depth_required and not self.args["--depth"]): logger.error( 'For "abundance" and "theoritical_hit" normalization, depth is required.' ) raise ValueError # Sanity check for validation if not self.args["--skip-validation"]: recursive_fasta_dir = join(self.args["--outdir"], "recursive_bin") if not exists(recursive_fasta_dir): os.makedirs(recursive_fasta_dir) else: if self.args["--force"]: shutil.rmtree(recursive_fasta_dir) os.makedirs(recursive_fasta_dir) else: logger.error( "%s already existed. Remove directory or use -F argument to overwrite it", recursive_fasta_dir, ) raise ValueError final_fasta_dir = join(self.args["--outdir"], "final_bin") if not exists(final_fasta_dir): os.makedirs(final_fasta_dir) else: if self.args["--force"]: shutil.rmtree(final_fasta_dir) os.makedirs(final_fasta_dir) else: logger.error( "%s already existed. Remove directory or use -F argument to overwrite it.", final_fasta_dir, ) raise ValueError # Check checkM availability if not mio.check_checkm(): logger.error( "CheckM is not in the path. Could not make the iterations") raise NameError # Manage start point. if self.args["--start"] == "fastq": start = 1 elif self.args["--start"] == "bam": start = 2 elif self.args["--start"] == "pair": start = 3 elif self.args["--start"] == "network": start = 4 else: logger.error( "Start argument should be 'fastq', 'bam', 'pair' or 'network'." ) raise ValueError # Check if forward and reverse reads are given for fastq and bam start. if (self.args["--start"] == "fastq" or (self.args["--start"] == "bam" and self.args["--aligner"] == "bowtie2")) and not self.args["--reverse"]: logger.error( "Forward and reverse arguments are necessary for fastq with %s start and %s aligner.", self.args["--start"], self.args["--aligner"], ) raise ValueError # Print information of the workflow: if start == 1: logger.info("Minimum mapping quality: %d", min_qual) if start <= 2: logger.info("Enzyme: %s", self.args["--enzyme"]) logger.info("Normalization: %s", self.args["--normalization"]) logger.info("Aligner algorithm: %s", self.args["--aligner"]) logger.info("Partition algorithm: %s", self.args["--algorithm"]) logger.info("Partition iterations: %s", iterations) logger.info("Overlapping parameter: %s", overlapping_parameter) if not self.args["--skip-validation"]: logger.info("Recursive partition iterations: %d", recursive_iterations) logger.info( "Recursive overlapping parameter: %s", recursive_overlapping_parameter, ) # Extract index and genome file assembly = self.args["--assembly"] # Check what is the reference. If a fasta is given build the index. If a # bowtie2 index is given, retreive the fasta. index = mio.check_fasta_index(assembly, mode=self.args["--aligner"]) if index is None: if mio.check_is_fasta(assembly): fasta = assembly if start == 1: index = mio.generate_fasta_index(fasta, self.args["--aligner"], tmp_dir) else: logger.error( "Please give as assembly argument a bowtie2 index or a fasta." ) raise ValueError else: fasta = mio.retrieve_fasta(index, self.args["--aligner"], tmp_dir) # Run the whole workflow if start <= 3: if start <= 2: # Align pair-end reads with bowtie2 alignment_files, contig_data, hit_data = mta.get_contact_pairs( self.args["--forward"], self.args["--reverse"], index, fasta, self.args["--aligner"], min_qual, self.args["--start"], self.args["--depth"], self.args["--enzyme"], self.args["--outdir"], tmp_dir, self.args["--threads"], ) else: alignment_files = self.args["--forward"].split(",") nb_alignment = len(alignment_files) contig_data, hit_data = mtn.create_contig_data( fasta, nb_alignment, self.args["--depth"], self.args["--enzyme"], ) # Build the network network_file, contigs_data_file = mtn.alignment_to_contacts( alignment_files, contig_data, hit_data, self.args["--outdir"], "network.txt", "contig_data_network.txt", tmp_dir, self.args["--threads"], self.args["--normalization"], False, ) else: contigs_data_file = self.args["--contigs"] network_file = self.args["--network"] # Partition the network clustering_matrix_partition_file, contigs_data_file = mtp.partition( self.args["--algorithm"], fasta, self.args["--cluster-matrix"], contigs_data_file, iterations, network_file, self.args["--outdir"], overlapping_fasta_dir, overlapping_parameter, resolution_parameter, size, tmp_dir, threads, ) # remove contig_data_network if not an input if start <= 2: contig_data_network_file = join(self.args["--outdir"], "contig_data_network.txt") os.remove(contig_data_network_file) # Launch validation if desired. if not self.args["--skip-validation"]: clustering_matrix_recursive_file = mtv.recursive_decontamination( self.args["--algorithm"], fasta, self.args["--cluster-matrix"], contigs_data_file, final_fasta_dir, overlapping_fasta_dir, recursive_iterations, network_file, self.args["--outdir"], recursive_overlapping_parameter, recursive_fasta_dir, resolution_parameter, size, tmp_dir, threads, ) if self.args["--cluster-matrix"]: # Make the sum with the partiton clustering matrix and save it. clustering_matrix = load_npz(clustering_matrix_partition_file + ".npz") clustering_matrix_recursive = load_npz( clustering_matrix_recursive_file + ".npz") clustering_matrix = ( (clustering_matrix + clustering_matrix_recursive) / 2).tocoo() clustering_matrix_file = join(self.args["--outdir"], "clustering_matrix") save_npz(clustering_matrix_file, clustering_matrix) # Remove contig_data_partition file contig_data_partition_file = join(self.args["--outdir"], "contig_data_partition.txt") os.remove(contig_data_partition_file) # Delete pyfastx index: os.remove(fasta + ".fxi") # Delete the temporary folder. if not self.args["--no-clean-up"]: shutil.rmtree(tmp_dir)
def execute(self): # Defined the temporary directory. if not self.args["--tmpdir"]: tmp_dir = mio.generate_temp_dir("./tmp") else: tmp_dir = self.args["--tmpdir"] os.makedirs(tmp_dir, exist_ok=True) # Defined the output directory and output file names. if not self.args["--outdir"]: self.args["--outdir"] = "." os.makedirs(self.args["--outdir"], exist_ok=True) # Enable file logging now = time.strftime("%Y%m%d%H%M%S") log_file = join(self.args["--outdir"], ("metator_network_" + now + ".log")) mtl.set_file_handler(log_file) # Transform integer variables as integer. min_qual = int(self.args["--min-quality"]) # Defined boolean variables: self_contacts = self.args["--self-contacts"] # Check if forward and reverse arguments are given: if (self.args["--start"] == "fastq" or (self.args["--start"] == "bam" and self.args["--aligner"] == "bowtie2")) and not self.args["--reverse"]: logger.error( "Forward and reverse arguments are necessary for fastq with %s start and %s aligner.", self.args["--start"], self.args["--aligner"], ) raise ValueError # Check if normalization in the list of possible normalization. list_normalization = [ "None", "abundance", "length", "RS", "empirical_hit", "theoritical_hit", ] if self.args["--normalization"] not in list_normalization: logger.error( 'Normalization should be among this list: "None", "abundance", "length", "RS", "empirical_hit", "theoritical_hit"' ) raise ValueError enzyme_required = ["RS", "theoritical_hit"] if (self.args["--normalization"] in enzyme_required and not self.args["--enzyme"]): logger.error( 'For "RS" and "theoritical_hit" normalization, enzyme is required.' ) raise ValueError depth_required = ["abundance", "theoritical_hit"] if (self.args["--normalization"] in depth_required and not self.args["--depth"]): logger.error( 'For "abundance" and "theoritical_hit" normalization, depth is required.' ) raise ValueError if self.args["--start"] not in ["fastq", "bam", "pair", "network"]: logger.error( "Start argument should be 'fastq', 'bam', 'pair' or 'network'." ) raise ValueError # Extract index and genome file assembly = self.args["--assembly"] # Check what is the reference. If a fasta is given build the index. If a # bowtie2 index is given, retreive the fasta. index = mio.check_fasta_index(assembly, mode=self.args["--aligner"]) if index is None: if mio.check_is_fasta(assembly): fasta = assembly # If start at bam could skip the index generation. if self.args["--start"] == "fastq": index = mio.generate_fasta_index(fasta, self.args["--aligner"], tmp_dir) else: logger.error( "Please give as assembly argument a %s index or a fasta.", self.args["--aligner"], ) raise ValueError else: fasta = mio.retrieve_fasta(index, self.args["--aligner"], tmp_dir) # Print information of teh workflow: logger.info("Aligner algorithm: %s", self.args["--aligner"]) logger.info("Enzyme: %s", self.args["--enzyme"]) logger.info("Normalization: %s", self.args["--normalization"]) logger.info("Minimum mapping quality: %s", self.args["--min-quality"]) # Do not align if pair start if self.args["--start"] == "pair": alignment_files = self.args["--forward"].split(",") nb_alignment = len(alignment_files) contig_data, hit_data = mtn.create_contig_data( fasta, nb_alignment, self.args["--depth"], self.args["--enzyme"], ) else: # Align pair-end reads with bowtie2 alignment_files, contig_data, hit_data = mta.get_contact_pairs( self.args["--forward"], self.args["--reverse"], index, fasta, self.args["--aligner"], min_qual, self.args["--start"], self.args["--depth"], self.args["--enzyme"], self.args["--outdir"], tmp_dir, self.args["--threads"], ) # Build the network mtn.alignment_to_contacts( alignment_files, contig_data, hit_data, self.args["--outdir"], "network.txt", "contig_data_network.txt", tmp_dir, self.args["--threads"], self.args["--normalization"], self_contacts, ) # Delete the temporary folder if not self.args["--no-clean-up"]: shutil.rmtree(tmp_dir)