Example #1
0
def get_alignment(fqs,
                  sample,
                  reference,
                  reference_info,
                  outdir,
                  temp,
                  threads,
                  partial=False):

    paired = True if len(fqs) == 2 else False

    count_file = ''.join([temp, 'pseudoalignments.tsv'])
    eq_file = ''.join([temp, 'pseudoalignments.ec'])

    total, avg, std = pseudoalign(fqs, sample, paired, reference, outdir, temp,
                                  threads)
    if partial:
        (commithash, (gene_set, allele_idx, exon_idx, lengths, partial_exons,
                      partial_alleles)) = reference_info

        exon_combos = get_exon_combinations()

        eq_idx, align_stats = process_partial_counts(count_file, eq_file,
                                                     allele_idx, lengths,
                                                     exon_idx, exon_combos)
        align_stats.extend([total, avg, std])

        alignment_summary(align_stats, True)

        with open(''.join([outdir, sample, '.partial_alignment.p']),
                  'wb') as file:
            alignment_info = [commithash, eq_idx, [], paired, align_stats, []]
            pickle.dump(alignment_info, file)

    else:
        (commithash, (gene_set, allele_idx, lengths,
                      gene_length)) = reference_info

        eq_idx, allele_eq, align_stats = process_counts(
            count_file, eq_file, gene_set, allele_idx, lengths)

        align_stats.extend([total, avg, std])

        alignment_summary(align_stats)

        gene_stats = get_count_stats(eq_idx, gene_length)
        gene_summary(gene_stats)

        with open(''.join([outdir, sample, '.alignment.p']), 'wb') as file:
            alignment_info = [
                commithash, eq_idx, allele_eq, paired, align_stats, gene_stats
            ]
            pickle.dump(alignment_info, file)

    return alignment_info
Example #2
0
def get_alignment(fqs,
                  sample,
                  reference,
                  reference_info,
                  outdir,
                  temp,
                  threads,
                  single,
                  partial=False,
                  avg=200,
                  std=20):
    '''Runs pseudoalignment and processes output.'''
    paired = not single

    count_file = ''.join([temp, 'pseudoalignments.tsv'])
    eq_file = ''.join([temp, 'pseudoalignments.ec'])

    pseudoalign(fqs, sample, paired, reference, outdir, temp, threads, avg,
                std)

    # Process partial genotyping pseudoalignment
    if partial:
        (commithash, (gene_set, allele_idx, exon_idx, lengths, partial_exons,
                      partial_alleles)) = reference_info

        exon_combos = get_exon_combinations()

        eq_idx, align_stats = process_partial_counts(count_file, eq_file,
                                                     allele_idx, lengths,
                                                     exon_idx, exon_combos)
        align_stats.extend([avg, std])

        alignment_summary(align_stats, True)

        with open(''.join([outdir, sample, '.partial_alignment.p']),
                  'wb') as file:
            alignment_info = [commithash, eq_idx, [], paired, align_stats, []]
            pickle.dump(alignment_info, file)

    # Process regular pseudoalignment
    else:
        (commithash, (gene_set, allele_idx, lengths,
                      gene_length)) = reference_info

        eq_idx, allele_eq, align_stats = process_counts(
            count_file, eq_file, gene_set, allele_idx, lengths)

        align_stats.extend([avg, std])

        alignment_summary(align_stats)

        gene_stats = get_count_stats(eq_idx, gene_length)
        gene_summary(gene_stats)

        with open(''.join([outdir, sample, '.alignment.p']), 'wb') as file:
            alignment_info = [
                commithash, eq_idx, allele_eq, paired, align_stats, gene_stats
            ]
            pickle.dump(alignment_info, file)

        with open(''.join([outdir, sample, '.genes.json']), 'w') as file:
            json.dump(gene_stats, file)

    return alignment_info
Example #3
0
def get_alignment(fqs,
                  sample,
                  reference,
                  reference_info,
                  outdir,
                  temp,
                  threads,
                  single,
                  partial=False,
                  avg=200,
                  std=20):
    '''Runs pseudoalignment and processes output.'''
    paired = not single

    count_file = ''.join([temp, 'pseudoalignments.tsv'])
    eq_file = ''.join([temp, 'pseudoalignments.ec'])

    total = pseudoalign(fqs, sample, paired, reference, outdir, temp, threads,
                        avg, std)

    # Process partial genotyping pseudoalignment
    if partial:
        (commithash, (gene_set, allele_idx, exon_idx, lengths, partial_exons,
                      partial_alleles)) = reference_info

        gene_set = set(gene_set)
        allele_idx = json.loads(allele_idx)
        exon_idx = json.loads(exon_idx)
        lengths = json.loads(lengths)
        lengths = dict([int(a), int(x)] for a, x in lengths.items())
        partial_exons = json.loads(partial_exons)
        partial_alleles = set(partial_alleles)

        exon_combos = get_exon_combinations()

        eq_idx, align_stats = process_partial_counts(count_file, eq_file,
                                                     allele_idx, lengths,
                                                     exon_idx, exon_combos)
        align_stats.extend([total, avg, std])

        alignment_summary(align_stats, True)

        with open(''.join([outdir, sample, '.partial_alignment.p']),
                  'wb') as file:
            alignment_info = [commithash, eq_idx, [], paired, align_stats, []]
            pickle.dump(alignment_info, file)

    # Process regular pseudoalignment
    else:
        (commithash, (gene_set, allele_idx, lengths,
                      gene_length)) = reference_info

        gene_set = set(gene_set)
        allele_idx = json.loads(allele_idx)
        gene_length = json.loads(gene_length)
        gene_length = dict([a, int(x)] for a, x in gene_length.items())
        lengths = json.loads(lengths)
        lengths = dict([int(a), int(x)] for a, x in lengths.items())

        eq_idx, allele_eq, align_stats = process_counts(
            count_file, eq_file, gene_set, allele_idx, lengths)

        align_stats.extend([total, avg, std])

        alignment_summary(align_stats)

        gene_stats = get_count_stats(eq_idx, gene_length)
        gene_summary(gene_stats)

        #todo, switch to json?
        with open(''.join([outdir, sample, '.alignment.p']), 'wb') as file:
            alignment_info = [
                commithash, eq_idx, allele_eq, paired, align_stats, gene_stats
            ]
            pickle.dump(alignment_info, file)

        with open(''.join([outdir, sample, '.genes.json']), 'w') as file:
            json.dump(gene_stats, file)

    return alignment_info
Example #4
0
    prior = pd.read_csv(hla_freq, delimiter='\t')
    prior = prior.set_index('allele').to_dict('index')

    # checks if HLA reference exists
    check_ref()

    # loads reference information
    with open(partial_p, 'rb') as file:
        (commithash, (gene_set, allele_idx, exon_idx, lengths, partial_exons,
                      partial_alleles)) = pickle.load(file)

    log.info(f'[log] Reference: %s', commithash)
    hline()

    exon_combos = get_exon_combinations()

    # runs transcript assembly if intermediate json not provided
    reference = partial_idx
    if not args.file[0].endswith('.partial_alignment.p'):
        paired = True if len(args.file) == 2 else False

        count_file, eq_file, num, avg, std = pseudoalign(
            args.file, sample, paired, reference, outdir, temp, args.threads,
            args.keep_files)

        eq_idx, count = process_partial_counts(count_file, eq_file, allele_idx,
                                               lengths, exon_idx, exon_combos,
                                               args.keep_files)

        with open(''.join([outdir, sample, '.partial_alignment.p']),