def count_reads(self): self.should_count_reads = True self.counts_dict[self.name] = reads_in_group( file_group=self.output_files_local()[0:2], cluster_sizes=load_duplicate_cluster_sizes( self.input_cluster_sizes_path()), cluster_key=lambda x: x)
def _count_reads_work(self, cluster_key, counter_name, fasta_files): # Count reads including duplicates (expanding cd-hit-dup clusters). self.should_count_reads = True self.counts_dict[counter_name] = count.reads_in_group( file_group=fasta_files, cluster_sizes=load_cdhit_cluster_sizes(self.input_cluster_sizes_path()), cluster_key=cluster_key)
def count_reads(self): self.should_count_reads = True files_to_count = self.output_files_local()[0:2] read_count = count.reads_in_group(files_to_count) self.counts_dict[self.name] = read_count # If the read count is exactly equal to the maximum allowed number, # infer that subsampling occurred: max_read_count = len( files_to_count) * self.additional_attributes["max_fragments"] if read_count == max_read_count: self.counts_dict["subsampled"] = 1
def count_input_reads(input_files, result_dir_local, result_dir_s3, target_name, max_fragments=None): local_input_files = [os.path.join(result_dir_local, f) for f in input_files[0:2]] count_file_basename = "%s.count" % target_name local_count_file = "%s/%s" % (result_dir_local, count_file_basename) s3_count_file = "%s/%s" % (result_dir_s3, count_file_basename) read_count = count.reads_in_group(local_input_files, max_fragments=max_fragments) counts_dict = {target_name: read_count} if read_count == len(local_input_files) * max_fragments: # If the number of reads is exactly equal to the maximum we specified, # it means that the input has been truncated. counts_dict["truncated"] = read_count with open(local_count_file, 'w') as count_file: json.dump(counts_dict, count_file) idseq_dag.util.s3.upload_with_retries(local_count_file, s3_count_file)
def count_reads(self): self.should_count_reads = True self.counts_dict[self.name] = count.reads_in_group(self.output_files_local()[0:2])
def count_reads(self): self.should_count_reads = True # Here we intentionally count unique reads. self.counts_dict[self.name] = count.reads_in_group( self.output_files_local()[:-2]) # last two outputs are not fastas
def count_reads(self): # count unidenfitied reads self.should_count_reads = True self.counts_dict["unidentified_fasta"] = count.reads_in_group([self.output_files_local()[1]])