def get_alignment(fqs, sample, reference, reference_info, outdir, temp, threads, partial=False): paired = True if len(fqs) == 2 else False count_file = ''.join([temp, 'pseudoalignments.tsv']) eq_file = ''.join([temp, 'pseudoalignments.ec']) total, avg, std = pseudoalign(fqs, sample, paired, reference, outdir, temp, threads) if partial: (commithash, (gene_set, allele_idx, exon_idx, lengths, partial_exons, partial_alleles)) = reference_info exon_combos = get_exon_combinations() eq_idx, align_stats = process_partial_counts(count_file, eq_file, allele_idx, lengths, exon_idx, exon_combos) align_stats.extend([total, avg, std]) alignment_summary(align_stats, True) with open(''.join([outdir, sample, '.partial_alignment.p']), 'wb') as file: alignment_info = [commithash, eq_idx, [], paired, align_stats, []] pickle.dump(alignment_info, file) else: (commithash, (gene_set, allele_idx, lengths, gene_length)) = reference_info eq_idx, allele_eq, align_stats = process_counts( count_file, eq_file, gene_set, allele_idx, lengths) align_stats.extend([total, avg, std]) alignment_summary(align_stats) gene_stats = get_count_stats(eq_idx, gene_length) gene_summary(gene_stats) with open(''.join([outdir, sample, '.alignment.p']), 'wb') as file: alignment_info = [ commithash, eq_idx, allele_eq, paired, align_stats, gene_stats ] pickle.dump(alignment_info, file) return alignment_info
def get_alignment(fqs, sample, reference, reference_info, outdir, temp, threads, single, partial=False, avg=200, std=20): '''Runs pseudoalignment and processes output.''' paired = not single count_file = ''.join([temp, 'pseudoalignments.tsv']) eq_file = ''.join([temp, 'pseudoalignments.ec']) pseudoalign(fqs, sample, paired, reference, outdir, temp, threads, avg, std) # Process partial genotyping pseudoalignment if partial: (commithash, (gene_set, allele_idx, exon_idx, lengths, partial_exons, partial_alleles)) = reference_info exon_combos = get_exon_combinations() eq_idx, align_stats = process_partial_counts(count_file, eq_file, allele_idx, lengths, exon_idx, exon_combos) align_stats.extend([avg, std]) alignment_summary(align_stats, True) with open(''.join([outdir, sample, '.partial_alignment.p']), 'wb') as file: alignment_info = [commithash, eq_idx, [], paired, align_stats, []] pickle.dump(alignment_info, file) # Process regular pseudoalignment else: (commithash, (gene_set, allele_idx, lengths, gene_length)) = reference_info eq_idx, allele_eq, align_stats = process_counts( count_file, eq_file, gene_set, allele_idx, lengths) align_stats.extend([avg, std]) alignment_summary(align_stats) gene_stats = get_count_stats(eq_idx, gene_length) gene_summary(gene_stats) with open(''.join([outdir, sample, '.alignment.p']), 'wb') as file: alignment_info = [ commithash, eq_idx, allele_eq, paired, align_stats, gene_stats ] pickle.dump(alignment_info, file) with open(''.join([outdir, sample, '.genes.json']), 'w') as file: json.dump(gene_stats, file) return alignment_info
def get_alignment(fqs, sample, reference, reference_info, outdir, temp, threads, single, partial=False, avg=200, std=20): '''Runs pseudoalignment and processes output.''' paired = not single count_file = ''.join([temp, 'pseudoalignments.tsv']) eq_file = ''.join([temp, 'pseudoalignments.ec']) total = pseudoalign(fqs, sample, paired, reference, outdir, temp, threads, avg, std) # Process partial genotyping pseudoalignment if partial: (commithash, (gene_set, allele_idx, exon_idx, lengths, partial_exons, partial_alleles)) = reference_info gene_set = set(gene_set) allele_idx = json.loads(allele_idx) exon_idx = json.loads(exon_idx) lengths = json.loads(lengths) lengths = dict([int(a), int(x)] for a, x in lengths.items()) partial_exons = json.loads(partial_exons) partial_alleles = set(partial_alleles) exon_combos = get_exon_combinations() eq_idx, align_stats = process_partial_counts(count_file, eq_file, allele_idx, lengths, exon_idx, exon_combos) align_stats.extend([total, avg, std]) alignment_summary(align_stats, True) with open(''.join([outdir, sample, '.partial_alignment.p']), 'wb') as file: alignment_info = [commithash, eq_idx, [], paired, align_stats, []] pickle.dump(alignment_info, file) # Process regular pseudoalignment else: (commithash, (gene_set, allele_idx, lengths, gene_length)) = reference_info gene_set = set(gene_set) allele_idx = json.loads(allele_idx) gene_length = json.loads(gene_length) gene_length = dict([a, int(x)] for a, x in gene_length.items()) lengths = json.loads(lengths) lengths = dict([int(a), int(x)] for a, x in lengths.items()) eq_idx, allele_eq, align_stats = process_counts( count_file, eq_file, gene_set, allele_idx, lengths) align_stats.extend([total, avg, std]) alignment_summary(align_stats) gene_stats = get_count_stats(eq_idx, gene_length) gene_summary(gene_stats) #todo, switch to json? with open(''.join([outdir, sample, '.alignment.p']), 'wb') as file: alignment_info = [ commithash, eq_idx, allele_eq, paired, align_stats, gene_stats ] pickle.dump(alignment_info, file) with open(''.join([outdir, sample, '.genes.json']), 'w') as file: json.dump(gene_stats, file) return alignment_info
prior = pd.read_csv(hla_freq, delimiter='\t') prior = prior.set_index('allele').to_dict('index') # checks if HLA reference exists check_ref() # loads reference information with open(partial_p, 'rb') as file: (commithash, (gene_set, allele_idx, exon_idx, lengths, partial_exons, partial_alleles)) = pickle.load(file) log.info(f'[log] Reference: %s', commithash) hline() exon_combos = get_exon_combinations() # runs transcript assembly if intermediate json not provided reference = partial_idx if not args.file[0].endswith('.partial_alignment.p'): paired = True if len(args.file) == 2 else False count_file, eq_file, num, avg, std = pseudoalign( args.file, sample, paired, reference, outdir, temp, args.threads, args.keep_files) eq_idx, count = process_partial_counts(count_file, eq_file, allele_idx, lengths, exon_idx, exon_combos, args.keep_files) with open(''.join([outdir, sample, '.partial_alignment.p']),