LOG.setLevel(logging.WARNING) if len(sys.argv) != 8 and len(sys.argv) != 9: print('Usage: sample_jacks_screen.py condensed_input test_line num_replicates(-1 for all) num_celllines(-1 for all) outfile num_samples num_guides(-1 for all) job_idx\n') print('where, condensed_input = countfile#replicatefile:rep_hdr:sample_hdr:ctrl_sample_or_hdr#guidemappingfile:sgrna_hdr:gene_hdr#ctrl_genes(can be blank)') else: #Minimial checks on this, as this is for a script that is intended for use internally only condensed_input = sys.argv[1] countfile, replicatestuff, grnastuff, ctrl_genes = condensed_input.split('#') replicatefile, rep_hdr, sample_hdr, ctrl_sample_or_hdr = replicatestuff.split(':') guidemappingfile, sgrna_hdr, gene_hdr = grnastuff.split(':') ctrl_sample_hdr = ctrl_sample_or_hdr if ctrl_sample_or_hdr == 'Control' else None sample_spec, ctrl_spec, sample_num_reps = createSampleSpec(countfile, replicatefile, rep_hdr, sample_hdr, ctrl_sample_or_hdr, ctrl_sample_hdr) gene_spec = createGeneSpec(guidemappingfile, sgrna_hdr, gene_hdr) test_celllines = [sample_id for sample_id in ctrl_spec if ctrl_spec[sample_id] != sample_id] ctrl_geneset = readControlGeneset(ctrl_genes) if ctrl_genes is not '' else set() normtype = 'median' test_line = sys.argv[2] num_replicates = eval(sys.argv[3]) num_celllines = eval(sys.argv[4]) outfile = sys.argv[5] num_samples = eval(sys.argv[6]) num_guides = eval(sys.argv[7]) if len(sys.argv) == 9: job_idx = sys.argv[8] else: job_idx = os.environ['LSB_JOBINDEX'] for bs in range(num_samples):
parser.add_argument("--v10", type=str, default='', help="Data set label") args = parser.parse_args() inputs_dir = 'input_files' if not os.path.isdir(inputs_dir): os.makedirs(inputs_dir) # Load the specification of samples to include LOG.info('Loading sample specification') sample_spec, ctrl_spec, sample_num_reps = createSampleSpec(args.countfile, args.replicatefile, args.rep_hdr, args.sample_hdr, args.common_ctrl_sample, args.ctrl_sample_hdr) # Load the mappings from guides to genes LOG.info('Loading gene mappings') gene_spec = createGeneSpec(args.guidemappingfile, args.sgrna_hdr, args.gene_hdr) # Sample not specified: re-call self for all samples if args.sample_id is None: for sample_id in ctrl_spec: if ctrl_spec[sample_id] == sample_id: continue cmd = py_cmd + ' ' + ' '.join(sys.argv) + ' --sample_id="%s"' % sample_id os.system(cmd) #Sample specified - run MAGeCK else: sample_id = args.sample_id out_dir = 'mageck_single_screens_%s_%s/%s_%s_1' % (args.v10, sample_id, args.v10, sample_id) if not os.path.isdir(out_dir): os.makedirs(out_dir)
args.sample_hdr, args.common_ctrl_sample, args.ctrl_sample_hdr) if args.cell_line != None: sample_spec = filterSampleSpec(sample_spec, args.cell_line, ctrl_spec) ctrl_spec = filterCtrlSpec(ctrl_spec, args.cell_line) outprefix += ('_' + args.cell_line) elif args.separate: for cell_line in ctrl_spec: if ctrl_spec[cell_line] == cell_line: continue cmd = '%s --cell_line=%s' % (' '.join(sys.argv), cell_line) os.system('%s %s' % (py_cmd cmd)) exit() # Load the mappings from guides to genes LOG.info('Loading gene mappings') gene_spec = createGeneSpec(args.guidemappingfile, args.sgrna_hdr, args.gene_hdr, ignore_blank_genes=args.ignore_blank_genes) # Load negative control guides (if any) ctrl_geneset = readControlGeneset(args.ctrl_genes) if args.ctrl_genes is not None else set() # Load the data and preprocess LOG.info('Loading data and pre-processing') data, meta, sample_ids, genes, gene_index = loadDataAndPreprocess(sample_spec, gene_spec,ctrl_spec=ctrl_spec, normtype=args.norm_type, ctrl_geneset=ctrl_geneset) gene_grnas = {gene: [x for x in meta[gene_index[gene], 0]] for gene in gene_index} testdata, ctrldata, test_sample_idxs = collateTestControlSamples(data, sample_ids, ctrl_spec) sample_ids_without_ctrl = [sample_ids[idx] for idx in test_sample_idxs] #Run all samples against their controls LOG.info('Running Single JACKS inference') single_jacks_results = [] for ts in range(testdata.shape[1]):