Beispiel #1
0
LOG.setLevel(logging.WARNING)

if len(sys.argv) != 8 and len(sys.argv) != 9:
    print('Usage: sample_jacks_screen.py condensed_input test_line num_replicates(-1 for all)  num_celllines(-1 for all) outfile num_samples num_guides(-1 for all) job_idx\n')
    print('where, condensed_input = countfile#replicatefile:rep_hdr:sample_hdr:ctrl_sample_or_hdr#guidemappingfile:sgrna_hdr:gene_hdr#ctrl_genes(can be blank)')
else:

    #Minimial checks on this, as this is for a script that is intended for use internally only
    condensed_input = sys.argv[1]
    countfile, replicatestuff, grnastuff, ctrl_genes = condensed_input.split('#')
    replicatefile, rep_hdr, sample_hdr, ctrl_sample_or_hdr = replicatestuff.split(':')
    guidemappingfile, sgrna_hdr, gene_hdr = grnastuff.split(':')
    ctrl_sample_hdr = ctrl_sample_or_hdr if ctrl_sample_or_hdr == 'Control' else None
    sample_spec, ctrl_spec, sample_num_reps = createSampleSpec(countfile, replicatefile, rep_hdr, sample_hdr, ctrl_sample_or_hdr, ctrl_sample_hdr)
    gene_spec = createGeneSpec(guidemappingfile, sgrna_hdr, gene_hdr)
    test_celllines = [sample_id for sample_id in ctrl_spec if ctrl_spec[sample_id] != sample_id]

    ctrl_geneset = readControlGeneset(ctrl_genes) if ctrl_genes is not '' else set()
    normtype = 'median'

    test_line = sys.argv[2]
    num_replicates = eval(sys.argv[3])
    num_celllines = eval(sys.argv[4])
    outfile = sys.argv[5]
    num_samples = eval(sys.argv[6])
    num_guides = eval(sys.argv[7])
    if len(sys.argv) == 9: job_idx = sys.argv[8]
    else: job_idx = os.environ['LSB_JOBINDEX']
    
    for bs in range(num_samples):
Beispiel #2
0
parser.add_argument("--v10",
                type=str,
                default='',
                help="Data set label")
args = parser.parse_args()

inputs_dir = 'input_files'
if not os.path.isdir(inputs_dir): os.makedirs(inputs_dir)

# Load the specification of samples to include
LOG.info('Loading sample specification')
sample_spec, ctrl_spec, sample_num_reps = createSampleSpec(args.countfile, args.replicatefile, args.rep_hdr,
                                                            args.sample_hdr, args.common_ctrl_sample, args.ctrl_sample_hdr)
# Load the mappings from guides to genes
LOG.info('Loading gene mappings')
gene_spec = createGeneSpec(args.guidemappingfile, args.sgrna_hdr, args.gene_hdr)

# Sample not specified: re-call self for all samples
if args.sample_id is None:
    for sample_id in ctrl_spec:
        if ctrl_spec[sample_id] == sample_id: continue
        cmd = py_cmd + ' ' + ' '.join(sys.argv) + ' --sample_id="%s"' % sample_id
        os.system(cmd)

#Sample specified - run MAGeCK
else:
    sample_id = args.sample_id

    out_dir = 'mageck_single_screens_%s_%s/%s_%s_1' % (args.v10, sample_id, args.v10, sample_id)
    if not os.path.isdir(out_dir):
        os.makedirs(out_dir)
Beispiel #3
0
                                                                args.sample_hdr, args.common_ctrl_sample, args.ctrl_sample_hdr)
    if args.cell_line != None:
        sample_spec = filterSampleSpec(sample_spec, args.cell_line, ctrl_spec)
        ctrl_spec = filterCtrlSpec(ctrl_spec, args.cell_line)
        outprefix += ('_' + args.cell_line)

    elif args.separate:
        for cell_line in ctrl_spec:
            if ctrl_spec[cell_line] == cell_line: continue
            cmd = '%s --cell_line=%s' % (' '.join(sys.argv), cell_line)
            os.system('%s %s' % (py_cmd cmd))
        exit()

    # Load the mappings from guides to genes
    LOG.info('Loading gene mappings')
    gene_spec = createGeneSpec(args.guidemappingfile, args.sgrna_hdr, args.gene_hdr, ignore_blank_genes=args.ignore_blank_genes)

    # Load negative control guides (if any)
    ctrl_geneset = readControlGeneset(args.ctrl_genes) if args.ctrl_genes is not None else set()

    # Load the data and preprocess
    LOG.info('Loading data and pre-processing')
    data, meta, sample_ids, genes, gene_index = loadDataAndPreprocess(sample_spec, gene_spec,ctrl_spec=ctrl_spec, normtype=args.norm_type, ctrl_geneset=ctrl_geneset)
    gene_grnas = {gene: [x for x in meta[gene_index[gene], 0]] for gene in gene_index}
    testdata, ctrldata, test_sample_idxs = collateTestControlSamples(data, sample_ids, ctrl_spec)
    sample_ids_without_ctrl = [sample_ids[idx] for idx in test_sample_idxs]

    #Run all samples against their controls
    LOG.info('Running Single JACKS inference')
    single_jacks_results = []
    for ts in range(testdata.shape[1]):