def singleprocess_permutation(info):
    # initialize input
    bed_list, mut_df, opts, fs_cts_df, p_inactivating = info
    current_chrom = bed_list[0].chrom
    logger.info('Working on chromosome: {0} . . .'.format(current_chrom))
    gene_fa = pysam.Fastafile(opts['input'])
    gs = GeneSequence(gene_fa, nuc_context=opts['context'])

    # list of columns that are needed
    cols = [
        'Chromosome',
        'Start_Position',
        'Reference_Allele',
        'Tumor_Allele',
        'Variant_Classification',
    ]
    # conditionally add protein_change column if exists
    if 'Protein_Change' in mut_df.columns:
        cols += ['Protein_Change']

    # figure out which genes actually have a mutation
    genes_with_mut = set(mut_df['Gene'].unique())

    # iterate through each gene
    result = []
    for bed in bed_list:
        if bed.gene_name not in genes_with_mut:
            # skip genes with no mutations
            continue

        # prepare info for running permutation test
        mut_info = mut_df.loc[mut_df['Gene'] == bed.gene_name, cols]
        gs.set_gene(bed)
        sc = SequenceContext(gs, seed=opts['seed'])

        # count total mutations in gene
        total_mut = len(mut_info)

        # fix nucleotide letter if gene is on - strand
        if bed.strand == '-':
            rc = mut_info['Tumor_Allele'].map(lambda x: utils.rev_comp(x))
            mut_info.loc[:, 'Tumor_Allele'] = rc

        # get coding positions, mutations unmapped to the reference tx will have
        # NA for a coding position
        pos_list = []
        for ix, row in mut_info.iterrows():
            coding_pos = bed.query_position(bed.strand, row['Chromosome'],
                                            row['Start_Position'])
            pos_list.append(coding_pos)
        mut_info.loc[:, 'Coding Position'] = pos_list

        # recover mutations that could not be mapped to the reference transcript
        # for a gene before being dropped (next step)
        unmapped_mut_info = mc.recover_unmapped_mut_info(
            mut_info, bed, sc, opts)

        # drop mutations wich do not map to reference tx
        mut_info = mut_info.dropna(subset=['Coding Position'
                                           ])  # mutations need to map to tx
        mut_info['Coding Position'] = mut_info['Coding Position'].astype(int)
        num_mapped_muts = len(mut_info)
        unmapped_muts = total_mut - num_mapped_muts

        # construct sequence context
        #gs.add_germline_variants(mut_info['Reference_Allele'].tolist(),
        #                         mut_info['Coding Position'].tolist())

        # calculate results of permutation test
        if opts['kind'] == 'oncogene':
            # calculate position based permutation results
            tmp_result = mypval.calc_position_p_value(
                mut_info,
                unmapped_mut_info,
                sc,
                gs,
                bed,
                opts['score_dir'],
                opts['num_iterations'],
                opts['stop_criteria'],
                0,  # no recurrent mutation pseudo count
                opts['recurrent'],
                opts['fraction'])
            result.append(tmp_result + [total_mut, unmapped_muts])
        elif opts['kind'] == 'tsg':
            # calculate results for deleterious mutation permutation test
            #fs_ct = fs_cts_df['total'][bed.gene_name]
            #fs_unmapped = fs_cts_df['unmapped'][bed.gene_name]
            # replaced fs_ct with zero to stop using the frameshifts in
            # simulation
            tmp_result = mypval.calc_deleterious_p_value(
                mut_info,
                unmapped_mut_info,
                sc,
                gs,
                bed,
                opts['num_iterations'],
                opts['stop_criteria'],
                opts['deleterious'],
                0,  # no deleterious mutation pseudo count
                opts['seed'])
            result.append(tmp_result + [num_mapped_muts, unmapped_muts])
            #fs_ct, fs_unmapped])
        elif opts['kind'] == 'hotmaps1d':
            # save null distribution if user option specified
            if opts['null_distr_dir']:
                if not os.path.exists(opts['null_distr_dir']):
                    os.mkdir(opts['null_distr_dir'])
                save_path = os.path.join(opts['null_distr_dir'],
                                         bed.gene_name + '.{0}.txt')
            else:
                save_path = None
            # calculate position based permutation results
            mywindow = list(map(int, opts['window'].split(',')))
            tmp_result = mypval.calc_hotmaps_p_value(mut_info,
                                                     unmapped_mut_info,
                                                     sc,
                                                     gs,
                                                     bed,
                                                     mywindow,
                                                     opts['num_iterations'],
                                                     opts['stop_criteria'],
                                                     opts['report_index'],
                                                     null_save_path=save_path)
            result.extend(tmp_result)
        elif opts['kind'] == 'protein':
            tmp_result = mypval.calc_protein_p_value(
                mut_info, unmapped_mut_info, sc, gs, bed,
                opts['neighbor_graph_dir'], opts['num_iterations'],
                opts['stop_criteria'], opts['recurrent'], opts['fraction'])
            result.append(tmp_result + [total_mut, unmapped_muts])
        else:
            # calc results for entropy-on-effect permutation test
            tmp_result = mypval.calc_effect_p_value(
                mut_info,
                unmapped_mut_info,
                sc,
                gs,
                bed,
                opts['num_iterations'],
                0,  #  no recurrent mutation pseudo count
                opts['recurrent'],
                opts['fraction'])
            result.append(tmp_result + [total_mut, unmapped_muts])

    gene_fa.close()
    logger.info('Finished working on chromosome: {0}.'.format(current_chrom))
    return result
def singleprocess_permutation(info):
    # initialize input
    bed_list, mut_df, opts, fs_cts_df, p_inactivating = info
    current_chrom = bed_list[0].chrom
    logger.info('Working on chromosome: {0} . . .'.format(current_chrom))
    gene_fa = pysam.Fastafile(opts['input'])
    gs = GeneSequence(gene_fa, nuc_context=opts['context'])

    # list of columns that are needed
    cols = ['Chromosome', 'Start_Position', 'Reference_Allele',
            'Tumor_Allele', 'Variant_Classification',]
    # conditionally add protein_change column if exists
    if 'Protein_Change' in mut_df.columns:
        cols += ['Protein_Change']

    # figure out which genes actually have a mutation
    genes_with_mut = set(mut_df['Gene'].unique())

    # iterate through each gene
    result = []
    for bed in bed_list:
        if bed.gene_name not in genes_with_mut:
            # skip genes with no mutations
            continue

        # prepare info for running permutation test
        mut_info = mut_df.loc[mut_df['Gene']==bed.gene_name, cols]
        gs.set_gene(bed)
        sc = SequenceContext(gs, seed=opts['seed'])

        # count total mutations in gene
        total_mut = len(mut_info)

        # fix nucleotide letter if gene is on - strand
        if bed.strand == '-':
            rc = mut_info['Tumor_Allele'].map(lambda x: utils.rev_comp(x))
            mut_info.loc[:, 'Tumor_Allele'] = rc

        # get coding positions, mutations unmapped to the reference tx will have
        # NA for a coding position
        pos_list = []
        for ix, row in mut_info.iterrows():
            coding_pos = bed.query_position(bed.strand, row['Chromosome'], row['Start_Position'])
            pos_list.append(coding_pos)
        mut_info.loc[:, 'Coding Position'] = pos_list

        # recover mutations that could not be mapped to the reference transcript
        # for a gene before being dropped (next step)
        unmapped_mut_info = mc.recover_unmapped_mut_info(mut_info, bed, sc, opts)

        # drop mutations wich do not map to reference tx
        mut_info = mut_info.dropna(subset=['Coding Position'])  # mutations need to map to tx
        mut_info['Coding Position'] = mut_info['Coding Position'].astype(int)
        num_mapped_muts = len(mut_info)
        unmapped_muts = total_mut - num_mapped_muts

        # construct sequence context
        #gs.add_germline_variants(mut_info['Reference_Allele'].tolist(),
        #                         mut_info['Coding Position'].tolist())

        # calculate results of permutation test
        if opts['kind'] == 'oncogene':
            # calculate position based permutation results
            tmp_result = mypval.calc_position_p_value(mut_info, unmapped_mut_info, sc,
                                                      gs, bed, opts['score_dir'],
                                                      opts['num_iterations'],
                                                      opts['stop_criteria'],
                                                      0,  # no recurrent mutation pseudo count
                                                      opts['recurrent'],
                                                      opts['fraction'])
            result.append(tmp_result + [total_mut, unmapped_muts])
        elif opts['kind'] == 'tsg':
            # calculate results for deleterious mutation permutation test
            #fs_ct = fs_cts_df['total'][bed.gene_name]
            #fs_unmapped = fs_cts_df['unmapped'][bed.gene_name]
            # replaced fs_ct with zero to stop using the frameshifts in
            # simulation
            tmp_result = mypval.calc_deleterious_p_value(mut_info, unmapped_mut_info,
                                                         sc, gs, bed,
                                                         opts['num_iterations'],
                                                         opts['stop_criteria'],
                                                         opts['deleterious'],
                                                         0,  # no deleterious mutation pseudo count
                                                         opts['seed'])
            result.append(tmp_result + [num_mapped_muts, unmapped_muts])
                                        #fs_ct, fs_unmapped])
        elif opts['kind'] == 'hotmaps1d':
            # save null distribution if user option specified
            if opts['null_distr_dir']:
                if not os.path.exists(opts['null_distr_dir']): os.mkdir(opts['null_distr_dir'])
                save_path = os.path.join(opts['null_distr_dir'], bed.gene_name + '.{0}.txt')
            else:
                save_path = None
            # calculate position based permutation results
            mywindow = list(map(int, opts['window'].split(',')))
            tmp_result = mypval.calc_hotmaps_p_value(mut_info, unmapped_mut_info, sc,
                                                     gs, bed,
                                                     mywindow,
                                                     opts['num_iterations'],
                                                     opts['stop_criteria'],
                                                     opts['report_index'],
                                                     null_save_path=save_path)
            result.extend(tmp_result)
        elif opts['kind'] == 'protein':
            tmp_result = mypval.calc_protein_p_value(mut_info, unmapped_mut_info,
                                                     sc, gs, bed,
                                                     opts['neighbor_graph_dir'],
                                                     opts['num_iterations'],
                                                     opts['stop_criteria'],
                                                     opts['recurrent'],
                                                     opts['fraction'])
            result.append(tmp_result + [total_mut, unmapped_muts])
        else:
            # calc results for entropy-on-effect permutation test
            tmp_result = mypval.calc_effect_p_value(mut_info, unmapped_mut_info,
                                                    sc, gs, bed,
                                                    opts['num_iterations'],
                                                    0, #  no recurrent mutation pseudo count
                                                    opts['recurrent'],
                                                    opts['fraction'])
            result.append(tmp_result + [total_mut, unmapped_muts])

    gene_fa.close()
    logger.info('Finished working on chromosome: {0}.'.format(current_chrom))
    return result