コード例 #1
0
ファイル: jacks_io.py プロジェクト: pleprohon/JACKS
def load_data_and_run(sample_spec, gene_spec, ctrl_spec, sgrna_reference_file, x_ref,
                      outprefix, apply_w_hp=APPLY_W_HP_DEFAULT, norm_type=NORM_TYPE_DEFAULT, 
                      ctrl_genes=None, fdr=None, fdr_thresh_type = 'REGULAR', n_pseudo=0, count_prior=32 ):

    # Load negative control genes (if any)
    ctrl_geneset = readControlGeneset(ctrl_genes, gene_spec) if ctrl_genes is not None else set()

    if '/' in outprefix and not os.path.exists(os.path.dirname(outprefix)): os.makedirs(os.path.dirname(outprefix))
    outfile_x = outprefix + '_grna_JACKS_results.txt'
    outfile_lfc = outprefix + '_logfoldchange_means.txt'
    outfile_lfc_std = outprefix + '_logfoldchange_std.txt'
    outfile_pickle = outprefix + PICKLE_FILENAME

    # Load the data and preprocess
    LOG.info('Loading data and pre-processing')
    data, meta, sample_ids, genes, gene_index = loadDataAndPreprocess(sample_spec, gene_spec,ctrl_spec=ctrl_spec,normtype=norm_type, ctrl_geneset=ctrl_geneset, prior=count_prior)
    gene_grnas = {gene: [x for x in meta[gene_index[gene], 0]] for gene in gene_index}
    testdata, ctrldata, test_sample_idxs = collateTestControlSamples(data, sample_ids, ctrl_spec)
    sample_ids_without_ctrl = [sample_ids[idx] for idx in test_sample_idxs]

    x_reference = None
    if sgrna_reference_file:
        # Create the X reference (in the correct order)
        x_reference = {'X1': np.array([eval(x_ref[x]['X1']) for x in meta[:, 0]]),
                       'X2': np.array([eval(x_ref[x]['X2']) for x in meta[:, 0]])}
    else:
        writeFoldChanges(outfile_lfc, testdata, ctrldata, meta, sample_ids_without_ctrl)
        writeFoldChanges(outfile_lfc_std, testdata, ctrldata, meta, sample_ids_without_ctrl, write_std=True)
        
    #Run all samples against their controls
    LOG.info('Running JACKS inference')
    jacks_results = inferJACKS(gene_index, testdata, ctrldata, apply_w_hp=apply_w_hp, fixed_x=x_reference)

    #Add a set of pseudo genes, created by randomly sampling from guides targeting genes in the control set
    if n_pseudo > 0 and len(ctrl_geneset) > 0:
        LOG.info('Running JACKS inference on %d pseudogenes' % n_pseudo)
        pseudo_gene_index = createPseudoNonessGenes(gene_index, ctrl_geneset, n_pseudo)
        jacks_pseudo_results = inferJACKS(pseudo_gene_index, testdata, ctrldata, apply_w_hp=apply_w_hp)
        writeJacksWResults(outprefix + '_pseudo_noness', jacks_pseudo_results, sample_ids_without_ctrl, write_types=['', '_std'] )
        for gene in jacks_results:
            jacks_pseudo_results[gene] = jacks_results[gene]

    # Write out the results
    LOG.info('Writing JACKS results')
    if len(ctrl_geneset) > 0 and n_pseudo > 0:
        writeJacksWResults(outprefix, jacks_pseudo_results, sample_ids_without_ctrl, ctrl_geneset=set([x for x in jacks_pseudo_results if 'JACKS_PSEUDO_GENE' in x]), write_types=['', '_std', '_pval'], fdr=fdr, pseudo=True, fdr_thresh_type=fdr_thresh_type)
    else:
        writeJacksWResults(outprefix, jacks_results, sample_ids_without_ctrl, ctrl_geneset=ctrl_geneset, write_types=['', '_std'])
    writeJacksXResults(outfile_x, jacks_results, gene_grnas)
    pickleJacksFullResults(outfile_pickle, jacks_results, sample_ids_without_ctrl, gene_grnas)
コード例 #2
0
ファイル: jacks_io.py プロジェクト: singjc/crisprtools
def runJACKS(countfile,
             replicatefile,
             guidemappingfile,
             rep_hdr=REP_HDR_DEFAULT,
             sample_hdr=SAMPLE_HDR_DEFAULT,
             common_ctrl_sample=COMMON_CTRL_SAMPLE_DEFAULT,
             ctrl_sample_hdr=None,
             sgrna_hdr=SGRNA_HDR_DEFAULT,
             gene_hdr=GENE_HDR_DEFAULT,
             outprefix=OUTPREFIX_DEFAULT,
             reffile=None,
             apply_w_hp=APPLY_W_HP_DEFAULT):
    outprefix = outprefix
    if '/' in outprefix and not os.path.exists(os.path.dirname(outprefix)):
        os.makedirs(os.path.dirname(outprefix))
    outfile_w = outprefix + '_gene_JACKS_results.txt'
    outfile_w2 = outprefix + '_genestd_JACKS_results.txt'
    outfile_x = outprefix + '_grna_JACKS_results.txt'
    outfile_lfc = outprefix + '_logfoldchange_means.txt'
    outfile_lfc_std = outprefix + '_logfoldchange_std.txt'
    outfile_pickle = outprefix + PICKLE_FILENAME

    # Load the specification of samples to include
    LOG.info('Loading sample specification')
    sample_spec, ctrl_spec, sample_num_reps = createSampleSpec(
        countfile, replicatefile, rep_hdr, sample_hdr, common_ctrl_sample,
        ctrl_sample_hdr)
    # Load the mappings from guides to genes
    LOG.info('Loading gene mappings')
    gene_spec = createGeneSpec(guidemappingfile, sgrna_hdr, gene_hdr)

    sgrna_reference_file = reffile
    if sgrna_reference_file:
        # Load the sgrna reference (precomputed X's)
        LOG.info('Loading sgrna reference values')
        x_ref = loadSgrnaReference(reffile)
        # Check that the data to be loaded have sgrna reference values
        LOG.info('Checking sgrna reference identifiers against gene mappings')
        for guide in gene_spec:
            if guide not in x_ref:
                raise Exception('%s has no sgrna reference in %s' %
                                (guide, sgrna_reference_file))

    # Load the data and preprocess
    LOG.info('Loading data and pre-processing')
    data, meta, sample_ids, genes, gene_index = loadDataAndPreprocess(
        sample_spec, gene_spec)
    gene_grnas = {
        gene: [x for x in meta[gene_index[gene], 0]]
        for gene in gene_index
    }
    x_reference = None
    if sgrna_reference_file:
        # Create the X reference (in the correct order)
        x_reference = {
            'X1': np.array([eval(x_ref[x]['X1']) for x in meta[:, 0]]),
            'X2': np.array([eval(x_ref[x]['X2']) for x in meta[:, 0]])
        }
    else:
        writeFoldChanges(outfile_lfc, data, meta, sample_ids)
        writeFoldChanges(outfile_lfc_std,
                         data,
                         meta,
                         sample_ids,
                         write_std=True)

    #Run all samples against their controls
    LOG.info('Running JACKS inference')
    testdata, ctrldata, test_sample_idxs = collateTestControlSamples(
        data, sample_ids, ctrl_spec)
    jacks_results = inferJACKS(gene_index,
                               testdata,
                               ctrldata,
                               apply_w_hp=apply_w_hp)

    # Write out the results
    LOG.info('Writing JACKS results')
    sample_ids_without_ctrl = [sample_ids[idx] for idx in test_sample_idxs]
    writeJacksWResults(outfile_w, jacks_results, sample_ids_without_ctrl)
    writeJacksWResults(outfile_w2,
                       jacks_results,
                       sample_ids_without_ctrl,
                       write_w2=True)
    writeJacksXResults(outfile_x, jacks_results, gene_grnas)
    pickleJacksFullResults(outfile_pickle, jacks_results,
                           sample_ids_without_ctrl, gene_grnas)
コード例 #3
0
    # Load negative control guides (if any)
    ctrl_geneset = readControlGeneset(args.ctrl_genes) if args.ctrl_genes is not None else set()

    # Load the data and preprocess
    LOG.info('Loading data and pre-processing')
    data, meta, sample_ids, genes, gene_index = loadDataAndPreprocess(sample_spec, gene_spec,ctrl_spec=ctrl_spec, normtype=args.norm_type, ctrl_geneset=ctrl_geneset)
    gene_grnas = {gene: [x for x in meta[gene_index[gene], 0]] for gene in gene_index}
    testdata, ctrldata, test_sample_idxs = collateTestControlSamples(data, sample_ids, ctrl_spec)
    sample_ids_without_ctrl = [sample_ids[idx] for idx in test_sample_idxs]

    #Run all samples against their controls
    LOG.info('Running Single JACKS inference')
    single_jacks_results = []
    for ts in range(testdata.shape[1]):
        single_jacks_results.append(inferJACKS(gene_index, testdata[:,[ts],:], ctrldata[:,[ts],:], w_only=True))
    jacks_results = combineSingleResults(single_jacks_results)


    #Add a set of pseudo genes, created by randomly sampling from guides targeting genes in the control set
    if args.n_pseudo > 0 and len(ctrl_geneset) > 0:
        LOG.info('Running Single JACKS inference on %d pseudogenes' % args.n_pseudo)
        pseudo_gene_index = createPseudoNonessGenes(gene_index, ctrl_geneset, args.n_pseudo)
        pseudo_single_results = []
        for ts in range(testdata.shape[1]):
            pseudo_single_results.append(inferJACKS(pseudo_gene_index, testdata[:,[ts],:], ctrldata[:,[ts],:], w_only=True))
        jacks_pseudo_results = combineSingleResults(pseudo_single_results)

        writeJacksWResults(outprefix + '_pseudo_noness', jacks_pseudo_results, sample_ids_without_ctrl, write_types=['', '_std'] )

    # Write out the results
コード例 #4
0
# Load negative control guides (if any)
ctrl_geneset = readControlGeneset(args.ctrl_genes) if args.ctrl_genes is not None else set()

##REFERENCE (to collect X's)

# Load the data and preprocess
LOG.info('Reference: Loading data and pre-processing')
data, meta, sample_ids, genes, gene_index = loadDataAndPreprocess(ref_sample_spec, gene_spec,ctrl_spec=ctrl_spec, normtype=args.norm_type, ctrl_geneset=ctrl_geneset)
gene_grnas = {gene: [x for x in meta[gene_index[gene], 0]] for gene in gene_index}
testdata, ctrldata, test_sample_idxs = collateTestControlSamples(data, sample_ids, ctrl_spec)
sample_ids_without_ctrl = [sample_ids[idx] for idx in test_sample_idxs]

#Run all samples against their controls
LOG.info('Reference: Running JACKS inference')
jacks_results = inferJACKS(gene_index, testdata, ctrldata)
writeJacksXResults(ref_outfile, jacks_results, gene_grnas)

##TEST (using reference)
LOG.info('Test: Loading data and pre-processing')
data, meta, sample_ids, genes, gene_index = loadDataAndPreprocess(single_sample_spec, gene_spec,ctrl_spec=single_ctrl_spec, normtype=args.norm_type, ctrl_geneset=ctrl_geneset)
gene_grnas = {gene: [x for x in meta[gene_index[gene], 0]] for gene in gene_index}
testdata, ctrldata, test_sample_idxs = collateTestControlSamples(data, sample_ids, ctrl_spec)
sample_ids_without_ctrl = [sample_ids[idx] for idx in test_sample_idxs]

x_ref = loadSgrnaReference(ref_outfile)
# Create the X reference (in the correct order)
x_reference = {'X1': np.array([eval(x_ref[x]['X1']) for x in meta[:, 0]]),
                'X2': np.array([eval(x_ref[x]['X2']) for x in meta[:, 0]])}

#Run all samples against their controls