def join(args, outs, chunk_defs, chunk_outs): if args.skip or args.is_multi_genome: return chunk_out = chunk_outs[0] cr_utils.copy(chunk_out.pca_h5, outs.pca_h5) cr_utils.copytree(chunk_out.pca_csv, outs.pca_csv)
def main(args, outs): if args.skip: return if args.random_seed is not None: np.random.seed(args.random_seed) # detect barnyard genomes = cr_matrix.GeneBCMatrices.load_genomes_from_h5(args.matrix_h5) if len(genomes) > 1: outs.is_multi_genome = True cr_utils.copy(args.matrix_h5, outs.preprocessed_matrix_h5) return else: outs.is_multi_genome = False genome = genomes[0] matrix = cr_matrix.GeneBCMatrices.load_h5( args.matrix_h5).get_matrix(genome) matrix = cr_matrix.GeneBCMatrix.preprocess_matrix( matrix, num_bcs=args.num_bcs, use_bcs=args.use_bcs, use_genes=args.use_genes, force_cells=args.force_cells) gbm = cr_matrix.GeneBCMatrices() gbm.matrices[genome] = matrix matrix_attrs = cr_matrix.get_matrix_attrs(args.matrix_h5) gbm.save_h5(outs.preprocessed_matrix_h5, extra_attrs=matrix_attrs)
def join(args, outs, chunk_defs, chunk_outs): if args.skip: return chunk_out = chunk_outs[0] cr_utils.copy(chunk_out.preprocessed_matrix_h5, outs.preprocessed_matrix_h5) outs.is_multi_genome = chunk_out.is_multi_genome
def join(args, outs, chunk_defs, chunk_outs): if args.skip or not args.is_multi_genome: return chunk_out = chunk_outs[0] cr_utils.copy(chunk_out.multi_genome_summary, outs.multi_genome_summary) cr_utils.copytree(chunk_out.multi_genome_csv, outs.multi_genome_csv) cr_utils.copytree(chunk_out.multi_genome_json, outs.multi_genome_json)
def main(args, outs): parsed = parse_parameters(args.params_csv) for param in ANALYSIS_PARAMS: if param in parsed: setattr(outs, param, parsed[param]) else: setattr(outs, param, None) if args.params_csv is not None: cr_utils.copy(args.params_csv, outs.params_csv)
def join(args, outs, chunk_defs, chunk_outs): cr_utils.copy(args.extract_reads_summary, outs.summary) cr_utils.copy(args.barcode_counts, outs.barcode_counts) outs.gem_groups = args.gem_groups outs.read_groups = args.read_groups outs.align = args.align outs.bam_comments = args.bam_comments outs.read1s = [co.read1s for co in chunk_outs] outs.read2s = [co.read2s for co in chunk_outs]
def get_gem_group_index_json(args, outs): if args.gem_group_index_json: cr_utils.copy(args.gem_group_index_json, outs.gem_group_index_json) else: generated_index = cr_matrix.get_gem_group_index( args.filtered_gene_bc_matrices_h5) if generated_index: with open(outs.gem_group_index_json, 'w') as outfile: tk_json.dump_numpy({"gem_group_index": generated_index}, outfile) return outs.gem_group_index_json
def write_genome_fasta(self, out_fasta_fn): if len(self.genomes) > 1: with open(out_fasta_fn, 'w') as f: for genome_prefix, in_fasta_fn in itertools.izip(self.genome_prefixes, self.in_fasta_fns): with open(in_fasta_fn, 'r') as g: for line in g: line = line.strip() if line.startswith('>'): line = '>' + genome_prefix + '_' + line[1:] f.write(line + '\n') else: cr_utils.copy(self.in_fasta_fns[0], out_fasta_fn)
def main(args, outs): if args.read1 is not None: # Ensure same extension out_path, _ = cr_utils.splitexts(outs.read1s) _, in_ext = cr_utils.splitexts(args.read1) outs.read1s = out_path + in_ext cr_utils.copy(args.read1, outs.read1s) if args.read2 is not None: out_path, _ = cr_utils.splitexts(outs.read2s) _, in_ext = cr_utils.splitexts(args.read2) outs.read2s = out_path + in_ext cr_utils.copy(args.read2, outs.read2s)
def join(args, outs, chunk_defs, chunk_outs): summary_files = [ args.reads_summary, args.filter_umis_summary, args.filter_barcodes_summary, args.trim_reads_summary, args.filter_reads_summary, args.filter_contigs_summary, args.report_contigs_summary, args.report_contig_alignments_summary, args.raw_consensus_summary, args.group_clonotypes_summary, ] summary_files = [sum_file for sum_file in summary_files if not sum_file is None] cr_report.merge_jsons(summary_files, outs.metrics_summary_json) # Copy barcode summary h5 if args.barcode_summary: cr_utils.copy(args.barcode_summary, outs.barcode_summary) # Copy cell barcodes if args.cell_barcodes: cr_utils.copy(args.cell_barcodes, outs.cell_barcodes) # Copy barcode support if args.barcode_support: cr_utils.copy(args.barcode_support, outs.barcode_support) # Copy barcode umi summary if args.barcode_umi_summary: cr_utils.copy(args.barcode_umi_summary, outs.barcode_umi_summary) # Copy umi info if args.umi_info: cr_utils.copy(args.umi_info, outs.umi_info) sample_data_paths = cr_webshim_data.SampleDataPaths( summary_path=outs.metrics_summary_json, barcode_summary_path=args.barcode_summary, vdj_clonotype_summary_path=args.clonotype_summary, vdj_barcode_support_path=args.barcode_support, ) sample_properties = cr_webshim.get_sample_properties(args.sample_id, args.sample_desc, [], version=martian.get_pipelines_version()) sample_data = cr_webshim.load_sample_data(sample_properties, sample_data_paths) if args.barcode_whitelist is not None: cr_webshim.build_web_summary_html(outs.web_summary, sample_properties, sample_data, PIPELINE_VDJ, alerts_output_filename=outs.alerts) cr_webshim.build_metrics_summary_csv(outs.metrics_summary_csv, sample_properties, sample_data, PIPELINE_VDJ)
def join(args, outs, chunk_defs, chunk_outs): # Copy files from single chunk to join for out_name in ['summary', 'contig_annotations', 'filtered_contig_fasta', 'filtered_contig_fastq', ]: src = getattr(chunk_outs[0], out_name) dest = getattr(outs, out_name) if os.path.isfile(src): cr_utils.copy(src, dest) else: setattr(outs, out_name, None)
def join(args, outs, chunk_defs, chunk_outs): chunk_out = chunk_outs[0] cr_utils.copy(chunk_out.web_summary, outs.web_summary) cr_utils.copy(chunk_out.alerts, outs.alerts) cr_utils.copy(chunk_out.metrics_summary_json, outs.metrics_summary_json) cr_utils.copy(chunk_out.metrics_summary_csv, outs.metrics_summary_csv)
def join(args, outs, chunk_defs, chunk_outs): if args.skip: outs.analysis = None outs.analysis_csv = None outs.summary = None return chunk_out = chunk_outs[0] cr_utils.copytree(chunk_out.analysis, outs.analysis) cr_utils.copytree(chunk_out.analysis_csv, outs.analysis_csv) if args.is_multi_genome: cr_utils.copy(args.multi_genome_summary, outs.summary) else: outs.summary = None
def main(args, outs): # NOOP if no vdj ref path specified if args.vdj_reference_path is None: outs.recombinome = None outs.recombinome_index = None return fasta_filename = vdj_reference.get_vdj_reference_fasta( args.vdj_reference_path) cr_utils.copy(fasta_filename, outs.recombinome) os.makedirs(outs.recombinome_index) # Build a bowtie2 index subprocess.check_call([ 'bowtie2-build', outs.recombinome, os.path.join(outs.recombinome_index, 'recombinome') ])
def join(args, outs, chunk_defs, chunk_outs): downsample = chunk_defs[0].downsample downsample_map = chunk_defs[0].downsample_map if downsample and len(downsample_map) > 1: input_h5_filenames = [ chunk_out.out_molecules for chunk_out in chunk_outs ] cr_mol_counter.MoleculeCounter.concatenate(outs.out_molecules, input_h5_filenames) else: # just copy input molecules cr_utils.copy(args.molecules, outs.out_molecules) # merge summaries summary = merge_summaries(chunk_outs) summary['downsample_info'] = downsample_map with open(outs.summary, 'w') as f: json.dump(summary, f, indent=4, sort_keys=True)
def join(args, outs, chunk_defs, chunk_outs): chunk_out = chunk_outs[0] cr_utils.copy(chunk_out.summary, outs.summary) cr_utils.copy(chunk_out.filtered_matrices_h5, outs.filtered_matrices_h5) cr_utils.copy(chunk_out.filtered_barcodes, outs.filtered_barcodes) cr_utils.copytree(chunk_out.filtered_matrices_mex, outs.filtered_matrices_mex)
def join(args, outs, chunk_defs, chunk_outs): summary_files = [ args.extract_reads_summary, args.correct_barcodes_summary, ] summary_files = [sum_file for sum_file in summary_files if not sum_file is None] cr_report.merge_jsons(summary_files, outs.summary) cr_utils.copy(args.raw_barcode_counts, outs.raw_barcode_counts) cr_utils.copy(args.corrected_barcode_counts, outs.corrected_barcode_counts) cr_utils.copy(args.barcode_summary, outs.barcode_summary) outs.gem_groups = args.gem_groups outs.read_groups = args.read_groups outs.align = args.align outs.bam_comments = args.bam_comments outs.bc_corrected_read1s = [out.bc_corrected_read1s for out in chunk_outs] outs.bc_corrected_read2s = [out.bc_corrected_read2s for out in chunk_outs]
def main(args, outs): if args.read1s is not None: cr_utils.copy(args.read1s, outs.bc_corrected_read1s) if args.read2s is not None: cr_utils.copy(args.read2s, outs.bc_corrected_read2s)
def join(args, outs, chunk_defs, chunk_outs): outs.chain_type = chunk_outs[0].chain_type cr_utils.copy(chunk_outs[0].summary, outs.summary)
def main(args, outs): reporter = vdj_report.VdjReporter( vdj_reference_path=args.vdj_reference_path) gene_umi_counts_per_bc = {} strand = cr_chem.get_strandedness(args.chemistry_def) paired_end = cr_chem.is_paired_end(args.chemistry_def) assert paired_end != (args.read2_chunk is None) # For the entire chunk, match reads against the V(D)J reference ref_fasta = vdj_reference.get_vdj_reference_fasta(args.vdj_reference_path) # The filtering code will write this bam. Then we'll read it, correct the UMIs # and write outs.chunked_bams. filter_bam = martian.make_path('tmp.bam') vdj_filt.run_read_match(args.read1_chunk, args.read2_chunk, ref_fasta, filter_bam, strand, args.sw_params) # Make two passes over the BAM file, processing one barcode at a time bam1 = pysam.AlignmentFile(filter_bam, check_sq=False) bam2 = pysam.AlignmentFile(filter_bam, check_sq=False) bc_iter1 = get_bc_grouped_pair_iter(bam1, paired_end) bc_iter2 = get_bc_grouped_pair_iter(bam2, paired_end) reads_per_bc = open(outs.reads_per_bc, 'w') out_bam, _ = tk_bam.create_bam_outfile(outs.barcode_chunked_bams, None, None, template=bam1) for (bc, pair_iter1), (_, pair_iter2) in itertools.izip(bc_iter1, bc_iter2): nreads = 0 # Pass 1: UMI correction umi_counts = defaultdict(int) for header, (read1, read2) in pair_iter1: nreads += 2 umi_counts[header.get_tag(cr_constants.RAW_UMI_TAG)] += 1 corrected_umis = correct_umis(umi_counts) # Pass 2: Write the UMI-corrected records process_bam_barcode(bam1, pair_iter2, bc, corrected_umis, reporter, gene_umi_counts_per_bc, strand, out_bam, paired_end) reads_per_bc.write('{}\t{}\n'.format(bc, nreads)) bam1.close() bam2.close() out_bam.close() # Write bc-gene-umi counts cPickle.dump(gene_umi_counts_per_bc, open(outs.chunked_gene_umi_counts, 'w')) # Copy the input barcodes if args.barcodes_chunk is not None: cr_utils.copy(args.barcodes_chunk, outs.barcodes_in_chunks) else: outs.barcodes_in_chunks = None reporter.save(outs.chunked_reporter)
def join(args, outs, chunk_defs, chunk_outs): contigs = [] contig_fastqs = [] contig_bams = [] if len(chunk_outs) == 0: # No input reads # Create empty BAM file with open(outs.contig_bam, 'w') as f: pass outs.contig_bam_bai = None # Create empty contig FASTA with open(outs.contig_fasta, 'w') as f: pass outs.contig_fasta_fai = None # Create empty contig FASTQ with open(outs.contig_fastq, 'w') as f: pass outs.metrics_summary_json = None outs.summary_tsv = None outs.umi_summary_tsv = None return summary_tsvs = [] umi_summary_tsvs = [] for chunk_out in chunk_outs: if not os.path.isfile(chunk_out.contig_fasta): continue contigs.append(chunk_out.contig_fasta) contig_fastqs.append(chunk_out.contig_fastq) contig_bams.append(chunk_out.contig_bam) summary_tsvs.append(chunk_out.summary_tsv) umi_summary_tsvs.append(chunk_out.umi_summary_tsv) cr_utils.concatenate_files(outs.contig_fasta, contigs) if os.path.getsize(outs.contig_fasta) > 0: tk_subproc.check_call('samtools faidx %s' % outs.contig_fasta, shell=True) outs.contig_fasta_fai = outs.contig_fasta + '.fai' cr_utils.concatenate_files(outs.contig_fastq, contig_fastqs) if len(summary_tsvs) > 0: cr_utils.concatenate_headered_files(outs.summary_tsv, summary_tsvs) if len(umi_summary_tsvs) > 0: cr_utils.concatenate_headered_files(outs.umi_summary_tsv, umi_summary_tsvs) if contig_bams: # Merge every N BAMs. Trying to merge them all at once # risks hitting the filehandle limit. n_merged = 0 while len(contig_bams) > 1: to_merge = contig_bams[0:MERGE_BAMS_N] tmp_bam = martian.make_path('merged-%04d.bam' % n_merged) n_merged += 1 print "Merging %d BAMs into %s ..." % (len(to_merge), tmp_bam) tk_bam.merge(tmp_bam, to_merge, threads=args.__threads) # Delete any temporary bams that have been merged for in_bam in to_merge: if os.path.basename(in_bam).startswith('merged-'): cr_utils.remove(in_bam) # Pop the input bams and push the merged bam contig_bams = contig_bams[len(to_merge):] + [tmp_bam] if os.path.basename(contig_bams[0]).startswith('merged-'): # We merged at least two chunks together. # Rename it to the output bam. cr_utils.move(contig_bams[0], outs.contig_bam) else: # There was only a single chunk, so copy it from the input cr_utils.copy(contig_bams[0], outs.contig_bam) tk_bam.index(outs.contig_bam) # Make sure the Martian out matches the actual index filename outs.contig_bam_bai = outs.contig_bam + '.bai' # Merge the assembler summary jsons merged_summary = cr_utils.merge_jsons_single_level( [out.metrics_summary_json for out in chunk_outs]) with open(outs.metrics_summary_json, 'w') as f: json.dump(tk_safe_json.json_sanitize(merged_summary), f, indent=4, sort_keys=True)
def join(args, outs, chunk_defs, chunk_outs): cr_utils.copy(chunk_outs[0].summary, outs.summary)
def build_reference_fasta_from_ensembl(gtf_paths, transcripts_to_remove_path, genome_fasta_path, reference_path, reference_name, ref_version, mkref_version): """Create cellranger-compatible vdj reference files from a list of ENSEMBL-like GTF files. Input files are concatenated. No attempt to merge/reconcile information across them is made. Providing the files in a different order might change the output in cases where there are multiple entries with the same transcript id and the same feature type (eg. V-region). """ transcripts = collections.defaultdict(list) if transcripts_to_remove_path: with open(transcripts_to_remove_path) as f: rm_transcripts = set([line.strip() for line in f.readlines()]) else: rm_transcripts = set() # Note: We cannot symlink here because some filesystems in the wild # do not support symlinks. print 'Copying genome reference sequence...' os.makedirs(os.path.dirname(get_vdj_reference_fasta(reference_path))) tmp_genome_fa_path = os.path.join(reference_path, 'genome.fasta') cr_utils.copy(genome_fasta_path, tmp_genome_fa_path) print '...done.\n' print 'Indexing genome reference sequence...' tk_subproc.check_call(['samtools', 'faidx', tmp_genome_fa_path]) print '...done.\n' print 'Loading genome reference sequence...' genome_fasta = pysam.FastaFile(tmp_genome_fa_path) print '...done.\n' print 'Computing hash of genome FASTA file...' fasta_hash = cr_utils.compute_hash_of_file(tmp_genome_fa_path) print '...done.\n' for gtf in gtf_paths: print 'Reading GTF {}'.format(gtf) for line_no, entry in enumerate(get_gtf_iter(open(gtf))): if not entry.feature in [ ENSEMBL_FIVE_PRIME_UTR_FEATURE, ENSEMBL_CDS_FEATURE ]: continue entry = parse_attributes(entry) transcript_id = entry.attributes.get('transcript_id') transcript_biotype = entry.attributes.get('transcript_biotype') gene_biotype = entry.attributes.get('gene_biotype') gene_name = entry.attributes.get('gene_name') # Skip irrelevant biotypes if transcript_biotype not in ENSEMBL_VDJ_BIOTYPES and not gene_biotype in ENSEMBL_VDJ_BIOTYPES: continue # Skip blacklisted gene names if transcript_id in rm_transcripts: continue # Warn and skip if transcript_id missing if transcript_id is None: print 'Warning: Entry on row %d has no transcript_id' % line_no continue # Warn and skip if gene_name missing if gene_name is None: print 'Warning: Transcript %s on row %d has biotype %s but no gene_name. Skipping.' % ( transcript_id, line_no, transcript_biotype) continue # Infer region type from biotype if transcript_biotype in ENSEMBL_VDJ_BIOTYPES: vdj_feature = infer_ensembl_vdj_feature_type( entry.feature, transcript_biotype) else: vdj_feature = infer_ensembl_vdj_feature_type( entry.feature, gene_biotype) # Warn and skip if region type could not be inferred if vdj_feature is None: print 'Warning: Transcript %s has biotype %s. Could not infer VDJ gene type. Skipping.' % ( transcript_id, transcript_biotype) continue # Features that share a transcript_id and feature type are presumably exons # so keep them together. transcripts[(transcript_id, vdj_feature)].append(entry) print '...done.\n' print 'Computing hash of genes GTF files...' digest = hashlib.sha1() # concatenate all the hashes into a string and then hash that string digest.update( reduce(lambda x, y: x + y, [cr_utils.compute_hash_of_file(gtf) for gtf in gtf_paths])) gtf_hash = digest.hexdigest() print '...done.\n' print 'Fetching sequences...' out_fasta = open(get_vdj_reference_fasta(reference_path), 'w') feature_id = 1 seen_features = set() for (transcript_id, region_type), regions in transcripts.iteritems(): if not all(r.chrom == regions[0].chrom for r in regions): chroms = sorted(list(set([r.chrom for r in regions]))) print 'Warning: Transcript %s spans multiple contigs: %s. Skipping.' % ( transcript_id, str(chroms)) continue if not all(r.strand == regions[0].strand for r in regions): print 'Warning: Transcript %s spans multiple strands. Skipping.' % transcript_id continue chrom = regions[0].chrom strand = regions[0].strand ens_gene_name = standardize_ensembl_gene_name( regions[0].attributes['gene_name']) transcript_id = regions[0].attributes['transcript_id'] if chrom not in genome_fasta: print 'Warning: Transcript %s is on contig "%s" which is not in the provided reference fasta. Skipping.' % ( transcript_id, chrom) continue # Build sequence regions.sort(key=lambda r: r.start) seq = '' for region in regions: # GTF coordinates are 1-based start, end = int(region.start) - 1, int(region.end) seq += genome_fasta.fetch(chrom, start, end) # Revcomp if transcript on reverse strand if strand == '-': seq = tk_seq.get_rev_comp(seq) # Strip Ns from termini if 'N' in seq: print 'Warning: Feature %s contains Ns. Stripping from the ends.' % str( (ens_gene_name, transcript_id, region_type)) seq = seq.strip('N') if len(seq) == 0: print 'Warning: Feature %s is all Ns. Skipping.' % str( (ens_gene_name, transcript_id, region_type)) continue # Infer various attributes from the Ensembl gene name record_id = transcript_id gene_name = ens_gene_name display_name = make_display_name(gene_name=gene_name, allele_name=None) chain = infer_ensembl_vdj_chain(gene_name) chain_type = infer_ensembl_vdj_chain_type(gene_name) # Ensembl doesn't encode alleles allele_name = '00' # Disallow spaces in these fields if ' ' in region_type: raise ValueError('Spaces not allowed in region type: "%s"' % region_type) if ' ' in gene_name: raise ValueError('Spaces not allowed in gene name: "%s"' % gene_name) if ' ' in record_id: raise ValueError('Spaces not allowed in record ID: "%s"' % record_id) # Warn on features we couldn't classify properly if chain_type not in vdj_constants.VDJ_CHAIN_TYPES: print ('Warning: Could not infer chain type for: %s. ' + \ 'Expected the first two characters of the gene name to be in %s. Feature skipped.') % \ (str((gene_name, record_id, region_type)), str(tuple(vdj_constants.VDJ_CHAIN_TYPES))) continue if region_type in vdj_constants.VDJ_C_FEATURE_TYPES and chain in vdj_constants.CHAINS_WITH_ISOTYPES: isotype = infer_ensembl_isotype(ens_gene_name) else: isotype = None feature = VdjAnnotationFeature( feature_id=feature_id, record_id=record_id, display_name=display_name, gene_name=gene_name, region_type=region_type, chain_type=chain_type, chain=chain, isotype=isotype, allele_name=allele_name, sequence=seq, ) # Don't add duplicate entries feat_key = get_duplicate_feature_key(feature) if feat_key in seen_features: print 'Warning: Skipping duplicate entry for %s (%s, %s).' % ( display_name, region_type, record_id) continue seen_features.add(feat_key) feature_id += 1 out_fasta.write(convert_vdj_feature_to_fasta_entry(feature) + '\n') print '...done.\n' print 'Deleting copy of genome fasta...' os.remove(tmp_genome_fa_path) os.remove(tmp_genome_fa_path + '.fai') print '...done.\n' print 'Writing metadata JSON file into reference folder...' metadata = { cr_constants.REFERENCE_GENOMES_KEY: reference_name, cr_constants.REFERENCE_FASTA_HASH_KEY: fasta_hash, cr_constants.REFERENCE_GTF_HASH_KEY: gtf_hash, cr_constants.REFERENCE_INPUT_FASTA_KEY: os.path.basename(genome_fasta_path), cr_constants.REFERENCE_INPUT_GTF_KEY: ','.join([os.path.basename(gtf_path) for gtf_path in gtf_paths]), cr_constants.REFERENCE_VERSION_KEY: ref_version, cr_constants.REFERENCE_MKREF_VERSION_KEY: mkref_version, cr_constants.REFERENCE_TYPE_KEY: vdj_constants.REFERENCE_TYPE, } with open( os.path.join(reference_path, cr_constants.REFERENCE_METADATA_FILE), 'w') as json_file: json.dump(tk_safe_json.json_sanitize(metadata), json_file, sort_keys=True, indent=4) print '...done.\n'
def main(args, outs): if args.summary is not None: cr_utils.copy(args.summary, outs.summary) if args.barcodes_detected is not None: cr_utils.copy(args.barcodes_detected, outs.barcodes_detected)
def join(args, outs, chunk_defs, chunk_outs): if chunk_outs[0].output_for_cloupe is None: # Set output to null if noloupe is set, or if we ran on a barnyard outs.output_for_cloupe = None else: cr_utils.copy(chunk_outs[0].output_for_cloupe, outs.output_for_cloupe)
def join(args, outs, chunk_defs, chunk_outs): cr_utils.copy(chunk_outs[0].summary, outs.summary) if chunk_outs[0].report is not None: cr_utils.copy(chunk_outs[0].report, outs.report) outs.chemistry_type = chunk_outs[0].chemistry_type
def main(args, outs): cr_utils.copy(args.trim_reads_summary, outs.summary)
def join(args, outs, chunk_defs, chunk_outs): chunk_out = chunk_outs[0] cr_utils.copy(chunk_out.web_summary, outs.web_summary) cr_utils.copy(chunk_out.summary, outs.summary)
def join(args, outs, chunk_defs, chunk_outs): cr_utils.copy(chunk_outs[0].cell_barcodes, outs.cell_barcodes) cr_utils.copy(chunk_outs[0].barcode_support, outs.barcode_support) cr_utils.copy(chunk_outs[0].summary, outs.summary) cr_utils.copy(chunk_outs[0].barcode_umi_summary, outs.barcode_umi_summary)
def join(args, outs, chunk_defs, chunk_outs): outs.min_readpairs_per_umi = chunk_outs[0].min_readpairs_per_umi cr_utils.copy(chunk_outs[0].cell_barcodes, outs.cell_barcodes) cr_utils.copy(chunk_outs[0].barcode_support, outs.barcode_support) cr_utils.copy(chunk_outs[0].summary, outs.summary) cr_utils.copy(chunk_outs[0].barcode_umi_summary, outs.barcode_umi_summary)