def compute_psi(sample_filenames, output_dir, event_type, read_len, overhang_len, use_cluster=False, chunk_jobs=False, filter_events=True, events_info_filename=None, settings_filename=None): """ Compute Psi values for skipped exons. Sample filenames is a mapping from sample label to sample. - sample_filenames = [[sample_label1, sample_filename1], [sample_label2, sample_filename2]] - output_dir: output directory - event_type: 'SE', 'RI', etc. """ misc_utils.make_dir(output_dir) output_dir = os.path.join(output_dir, event_type) output_dir = os.path.abspath(output_dir) misc_utils.make_dir(output_dir) print "Computing Psi for events of type %s" % (event_type) print " - samples used: ", sample_filenames.keys() for sample_label, sample_filename in sample_filenames.iteritems(): print "Processing sample: label=%s, filename=%s" \ %(sample_label, sample_filename) results_output_dir = os.path.join(output_dir, sample_label) misc_utils.make_dir(results_output_dir) # Load the set of counts and serialize them into JSON events = \ as_events.load_event_counts(sample_filename, event_type, events_info_filename=events_info_filename) # Filter events if filter_events: print "Filtering events..." events.filter_events(settings=Settings.get()) print "Running on a total of %d events." % (len(events.events)) events_filename = events.output_file(results_output_dir, sample_label) # Run MISO on them miso_cmd = "python %s --compute-two-iso-psi %s %s --event-type %s " \ "--read-len %d --overhang-len %d " \ %(os.path.join(miso_path, 'run_miso.py'), events_filename, results_output_dir, event_type, read_len, overhang_len) if use_cluster: if chunk_jobs: miso_cmd += ' --use-cluster --chunk-jobs %d' % (chunk_jobs) else: miso_cmd += ' --use-cluster' print "Executing: %s" % (miso_cmd) if use_cluster: print " - Using cluster" os.system(miso_cmd)
def compute_psi(sample_filenames, output_dir, event_type, read_len, overhang_len, use_cluster=False, chunk_jobs=False, filter_events=True, events_info_filename=None, settings_filename=None): """ Compute Psi values for skipped exons. Sample filenames is a mapping from sample label to sample. - sample_filenames = [[sample_label1, sample_filename1], [sample_label2, sample_filename2]] - output_dir: output directory - event_type: 'SE', 'RI', etc. """ misc_utils.make_dir(output_dir) output_dir = os.path.join(output_dir, event_type) output_dir = os.path.abspath(output_dir) misc_utils.make_dir(output_dir) print "Computing Psi for events of type %s" %(event_type) print " - samples used: ", sample_filenames.keys() for sample_label, sample_filename in sample_filenames.iteritems(): print "Processing sample: label=%s, filename=%s" \ %(sample_label, sample_filename) results_output_dir = os.path.join(output_dir, sample_label) misc_utils.make_dir(results_output_dir) # Load the set of counts and serialize them into JSON events = \ as_events.load_event_counts(sample_filename, event_type, events_info_filename=events_info_filename) # Filter events if filter_events: print "Filtering events..." events.filter_events(settings=Settings.get()) print "Running on a total of %d events." %(len(events.events)) events_filename = events.output_file(results_output_dir, sample_label) # Run MISO on them miso_cmd = "python %s --compute-two-iso-psi %s %s --event-type %s " \ "--read-len %d --overhang-len %d " \ %(os.path.join(miso_path, 'run_miso.py'), events_filename, results_output_dir, event_type, read_len, overhang_len) if use_cluster: if chunk_jobs: miso_cmd += ' --use-cluster --chunk-jobs %d' %(chunk_jobs) else: miso_cmd += ' --use-cluster' print "Executing: %s" %(miso_cmd) if use_cluster: print " - Using cluster" os.system(miso_cmd)