def accession_qc_object(obj_type, obj, keypair, server, dryrun, force): logger.debug('in accession_qc_object with obj_type %s obj.keys() %s' %(obj_type, obj.keys())) logger.debug('obj[step_run] %s' %(obj.get('step_run'))) url = urlparse.urljoin(server,'/search/?type=%s&step_run=%s' %(obj_type, obj.get('step_run'))) logger.debug('url %s' %(url)) r = common.encoded_get(url,keypair) objects = [o for o in r['@graph'] if o['status'] not in DEPRECATED] logger.debug('found %d qc objects of type %s' %(len(objects), obj_type)) existing_objects = [o for o in objects if o.get('step_run') == obj['step_run']] if existing_objects: existing_object = existing_objects.pop() else: existing_object = None for object_to_delete in existing_objects: url = urlparse.urljoin(server,object_to_delete['@id']) common.encoded_patch(url, keypair, {'status':'deleted'}) payload = json.dumps(obj) if existing_object: url = urlparse.urljoin(server, existing_object['@id']) logger.debug('patching %s with %s' %(url,payload)) # r = requests.patch(url, auth=keypair, headers={'content-type': 'application/json'}, data=payload) r = common.encoded_patch(url, keypair, obj, return_response=True) else: url = urlparse.urljoin(server, '/%s/' %(obj_type)) logger.debug('posting to %s with %s' %(url,payload)) # r = requests.post(url, auth=keypair, headers={'content-type': 'application/json'}, data=payload) r = common.encoded_post(url, keypair, obj, return_response=True) try: r.raise_for_status() except: logger.error('PATCH or POST failed: %s %s' % (r.status_code, r.reason)) logger.error('url was %s' %(url)) logger.error(r.text) new_qc_object = None else: new_qc_object = r.json()['@graph'][0] return new_qc_object
def patch_file(payload, keypair, server, dryrun): logger.debug('in patch_file with %s' %(pprint.pformat(payload))) accession = payload.pop('accession') url = urlparse.urljoin(server,'files/%s' %(accession)) if dryrun: logger.info("Dry run. Would PATCH: %s with %s" %(accession, pprint.pformat(payload))) logger.info("Dry run. Returning unchanged file object") new_file_object = common.encoded_get(urlparse.urljoin(server,'/files/%s' %(accession)), keypair) else: # r = requests.patch(url, auth=keypair, headers={'content-type': 'application/json'}, data=json.dumps(payload)) r = common.encoded_patch(url, keypair, payload, return_response=True) try: r.raise_for_status() except: logger.error('PATCH file object failed: %s %s' % (r.status_code, r.reason)) logger.error(r.text) new_file_object = None else: new_file_object = r.json()['@graph'][0] logger.info("Patched: %s" %(new_file_object.get('accession'))) return new_file_object
def main(): args = get_args() authid, authpw, server = common.processkey(args.key, args.keyfile) keypair = (authid, authpw) experiments = [] if args.experiments: experiments.extend(args.experiments) if args.infile: with open(args.infile, 'r') as fh: experiments.extend([e for e in fh]) for exp_id in experiments: if exp_id.startswith('#'): continue exp_id = exp_id.rstrip() print "Experiment %s" % (exp_id) experiment_url = server + '/experiments/%s/' % (exp_id) experiment = common.encoded_get(experiment_url, keypair) if experiment.get('target'): target_url = server + experiment.get('target') target = common.encoded_get(target_url, keypair) else: logging.error('Experiment has no target ... skipping') continue print "%s %s %s" % (experiment['accession'], target.get('investigated_as'), experiment.get('description')) # ctl_id = get_control_id(experiment) # if ctl_id: # print "Control %s" %(ctl_id) # else: # print "Found no control ... skipping %s" %(exp_id) # continue # (rep1_ta,rep1_pe), (rep2_ta,rep2_pe) = get_exp_tas(experiment, server, keypair, args.project, args.inf) # (ctl1_ta,ctl1_pe), (ctl2_ta,ctl2_pe) = get_ctl_tas(experiment, server, keypair, args.project, args.inf) tas = get_tas(experiment, server, keypair, args.project, args.inf) if not tas: logging.error('Failed to resolve all tagaligns for %s' % (experiment['accession'])) continue pprint.pprint(tas) # sys.exit() #continue skip_flag = False for key, value in tas.iteritems(): if not value: logging.error('Missing %s ... skipping' % (key)) skip_flag = True if skip_flag: continue workflow_title = '%s Peaks' % (exp_id) if args.tag: workflow_title += ' %s' % (args.tag) outf = args.outf if not outf.startswith('/') and outf != '/': outf = '/' + outf if not outf.endswith('/') and outf != '/': outf += '/' outf += '%s/peaks/' % (exp_id) try: investigated_as = target['investigated_as'] except: print "%s: Failed to determine target type ... skipping" % (exp_id) continue else: print investigated_as rep1_pe = tas['rep1_ta']['paired_end'] rep2_pe = tas['rep2_ta']['paired_end'] if None in [rep1_pe, rep2_pe]: print "%s: Cannot determine paired end: rep1 PE = %s, rep2 PE = %s ... skipping" % ( exp_id, rep1_pe, rep2_pe) continue if rep1_pe != rep2_pe: print "%s: rep1 PE %s differs from rep2 PE %s ... skipping" % ( exp_id, rep1_pe, rep2_pe) continue if any('histone' in target_type for target_type in investigated_as): print "Found to be histone. No blacklist will be used." IDR_default = False workflow_spinner = '~/chip-seq-pipeline/dnanexus/histone_workflow.py' blacklist = None else: print "Assumed to be tf" IDR_default = True workflow_spinner = '~/chip-seq-pipeline/dnanexus/tf_workflow.py' if args.assembly == "hg19": blacklist = "ENCODE Reference Files:/hg19/blacklists/wgEncodeDacMapabilityConsensusExcludable.bed.gz" else: print "WARNING: No blacklist known for assembly %s, proceeding with no blacklist" % ( args.assembly) blacklist = None run_command = \ '%s --title "%s" --outf "%s" --nomap --yes ' % (workflow_spinner, workflow_title, outf) + \ '--rep1pe %s --rep2pe %s ' % (str(rep1_pe).lower(), str(rep2_pe).lower()) + \ '--rep1 %s --rep2 %s ' % (tas['rep1_ta'].get('file_id'), tas['rep2_ta'].get('file_id')) + \ '--ctl1 %s --ctl2 %s ' % (tas['rep1_ta'].get('control_id'), tas['rep2_ta'].get('control_id')) + \ '--genomesize %s --chrom_sizes "%s"' %(args.gsize, args.csizes) if blacklist: run_command += ' --blacklist "%s"' % (blacklist) if args.debug: run_command += ' --debug' if args.idr or IDR_default: run_command += ' --idr --idrversion %s' % (args.idrversion) print run_command if args.dryrun: logging.info('Dryrun') else: try: subprocess.check_call(run_command, shell=True) except subprocess.CalledProcessError as e: logging.error("%s exited with non-zero code %d" % (workflow_spinner, e.returncode)) else: print "%s workflow created" % (experiment['accession']) logging.debug("patching internal_status to url %s" % (experiment_url)) r = common.encoded_patch(experiment_url, keypair, {'internal_status': 'processing'}, return_response=True) try: r.raise_for_status() except: logging.error( "Tried but failed to update experiment internal_status to processing" ) logging.error(r.text)
def main(): global args args = get_args() authid, authpw, server = common.processkey(args.key, args.keyfile) keypair = (authid,authpw) if args.experiments: exp_ids = csv.reader(StringIO.StringIO('\n'.join([s.rstrip() for s in args.experiments]))) else: exp_ids = csv.reader(args.infile) for instring in exp_ids: exp_id = instring[0].strip() if len(instring) > 1: repns = [] for s in instring[1:]: repns.extend(s.split(',')) biorep_ns = list(set([int(s) for s in repns])) else: biorep_ns = [] outstrings = [] encode_url = urlparse.urljoin(server,exp_id) experiment = common.encoded_get(encode_url, keypair) outstrings.append(exp_id) files = files_to_map(experiment, server, keypair, args.no_sfn_dupes) outstrings.append(str(len(files))) outstrings.append(str([f.get('accession') for f in files])) replicates = replicates_to_map(files, server, keypair, biorep_ns) in_process = False if files: for biorep_n in set([rep.get('biological_replicate_number') for rep in replicates]): outstrings.append('rep%s' %(biorep_n)) biorep_files = [f for f in files if biorep_n in common.biorep_ns(f,server,keypair)] paired_files = [] unpaired_files = [] while biorep_files: file_object = biorep_files.pop() if file_object.get('paired_end') == None: # group all the unpaired reads for this biorep together unpaired_files.append(file_object) elif file_object.get('paired_end') in ['1','2']: if file_object.get('paired_with'): mate = next((f for f in biorep_files if f.get('@id') == file_object.get('paired_with')), None) else: #have to find the file that is paired with this one mate = next((f for f in biorep_files if f.get('paired_with') == file_object.get('@id')), None) if mate: biorep_files.remove(mate) else: logging.warning('%s:%s could not find mate' %(experiment.get('accession'), file_object.get('accession'))) mate = {} # if mapping as SE, ignore the mate and just map the # rep1 as SE with all the other SE for this rep, if any if args.force_se: unpaired_files.append(next( f for f in [file_object, mate] if f.get('paired_end') == '1')) else: paired_files.append((file_object, mate)) if biorep_files: logging.warning('%s: leftover file(s) %s' %(experiment.get('accession'), biorep_files)) if paired_files: pe_jobs = map_only(experiment, biorep_n, paired_files, args.key, server, keypair, args.sex_specific) in_process = True if unpaired_files: se_jobs = map_only(experiment, biorep_n, unpaired_files, args.key, server, keypair, args.sex_specific) in_process = True if paired_files and pe_jobs: outstrings.append('paired:%s' %([(a.get('accession'), b.get('accession')) for (a,b) in paired_files])) outstrings.append('paired jobs:%s' %([j.get_id() for j in pe_jobs])) else: outstrings.append('paired:%s' %(None)) if unpaired_files and se_jobs: outstrings.append('unpaired:%s' %([f.get('accession') for f in unpaired_files])) outstrings.append('unpaired jobs:%s' %([j.get_id() for j in se_jobs])) else: outstrings.append('unpaired:%s' %(None)) if in_process: r = common.encoded_patch(encode_url, keypair, {"internal_status": "processing"}, return_response=True) try: r.raise_for_status() except: logging.error("Tried and failed to set internal_status") logging.error(r.text) print '\t'.join(outstrings) else: # no files if not replicates: logging.warning('%s: No files and no replicates' %experiment.get('accession')) else: logging.warning('%s: No files to map' %experiment.get('accession')) if files and not replicates: logging.warning('%s: Files but no replicates' %experiment.get('accession'))
def main(): args = get_args() authid, authpw, server = common.processkey(args.key, args.keyfile) keypair = (authid,authpw) experiments = [] if args.experiments: experiments.extend(args.experiments) if args.infile: with open(args.infile,'r') as fh: experiments.extend([e for e in fh]) for exp_id in experiments: if exp_id.startswith('#'): continue exp_id = exp_id.rstrip() print "Experiment %s" %(exp_id) experiment_url = server + '/experiments/%s/' %(exp_id) experiment = common.encoded_get(experiment_url, keypair) if experiment.get('target'): target_url = server + experiment.get('target') target = common.encoded_get(target_url, keypair) else: logging.error('Experiment has no target ... skipping') continue print "%s %s %s" %(experiment['accession'], target.get('investigated_as'), experiment.get('description')) # ctl_id = get_control_id(experiment) # if ctl_id: # print "Control %s" %(ctl_id) # else: # print "Found no control ... skipping %s" %(exp_id) # continue # (rep1_ta,rep1_pe), (rep2_ta,rep2_pe) = get_exp_tas(experiment, server, keypair, args.project, args.inf) # (ctl1_ta,ctl1_pe), (ctl2_ta,ctl2_pe) = get_ctl_tas(experiment, server, keypair, args.project, args.inf) tas = get_tas(experiment, server, keypair, args.project, args.inf) if not tas: logging.error('Failed to resolve all tagaligns for %s' %(experiment['accession'])) continue pprint.pprint(tas) # sys.exit() #continue skip_flag = False for key,value in tas.iteritems(): if not value: logging.error('Missing %s ... skipping' %(key)) skip_flag = True if skip_flag: continue workflow_title = '%s Peaks' %(exp_id) if args.tag: workflow_title += ' %s' %(args.tag) outf = args.outf if not outf.startswith('/') and outf != '/': outf = '/'+outf if not outf.endswith('/') and outf != '/': outf += '/' outf += '%s/peaks/' %(exp_id) try: investigated_as = target['investigated_as'] except: print "%s: Failed to determine target type ... skipping" %(exp_id) continue else: print investigated_as rep1_pe = tas['rep1_ta']['paired_end'] rep2_pe = tas['rep2_ta']['paired_end'] if None in [rep1_pe, rep2_pe]: print "%s: Cannot determine paired end: rep1 PE = %s, rep2 PE = %s ... skipping" % ( exp_id, rep1_pe, rep2_pe) continue if rep1_pe != rep2_pe: print "%s: rep1 PE %s differs from rep2 PE %s ... skipping" % ( exp_id, rep1_pe, rep2_pe) continue if any('histone' in target_type for target_type in investigated_as): print "Found to be histone. No blacklist will be used." IDR_default = False workflow_spinner = '~/chip-seq-pipeline/dnanexus/histone_workflow.py' blacklist = None else: print "Assumed to be tf" IDR_default = True workflow_spinner = '~/chip-seq-pipeline/dnanexus/tf_workflow.py' if args.assembly == "hg19": blacklist = "ENCODE Reference Files:/hg19/blacklists/wgEncodeDacMapabilityConsensusExcludable.bed.gz" else: print "WARNING: No blacklist known for assembly %s, proceeding with no blacklist" %(args.assembly) blacklist = None run_command = \ '%s --title "%s" --outf "%s" --nomap --yes ' % (workflow_spinner, workflow_title, outf) + \ '--rep1pe %s --rep2pe %s ' % (str(rep1_pe).lower(), str(rep2_pe).lower()) + \ '--rep1 %s --rep2 %s ' % (tas['rep1_ta'].get('file_id'), tas['rep2_ta'].get('file_id')) + \ '--ctl1 %s --ctl2 %s ' % (tas['rep1_ta'].get('control_id'), tas['rep2_ta'].get('control_id')) + \ '--genomesize %s --chrom_sizes "%s"' %(args.gsize, args.csizes) if blacklist: run_command += ' --blacklist "%s"' %(blacklist) if args.debug: run_command += ' --debug' if args.idr or IDR_default: run_command += ' --idr --idrversion %s' %(args.idrversion) print run_command if args.dryrun: logging.info('Dryrun') else: try: subprocess.check_call(run_command, shell=True) except subprocess.CalledProcessError as e: logging.error("%s exited with non-zero code %d" %(workflow_spinner, e.returncode)) else: print "%s workflow created" %(experiment['accession']) logging.debug("patching internal_status to url %s" %(experiment_url)) r = common.encoded_patch(experiment_url, keypair, {'internal_status':'processing'}, return_response=True) try: r.raise_for_status() except: logging.error("Tried but failed to update experiment internal_status to processing") logging.error(r.text)
def main(): args = get_args() authid, authpw, server = common.processkey(args.key, args.keyfile) keypair = (authid, authpw) experiments = [] if args.experiments: experiments.extend(args.experiments) if args.infile: with open(args.infile, 'r') as fh: experiments.extend([e for e in fh]) if args.control: control_dxhandler = resolve_dx_file(args.control) else: control_dxhandler = None for exp_id in experiments: if exp_id.startswith('#'): continue exp_id = exp_id.rstrip() print("Experiment %s" % (exp_id)) experiment_url = server + '/experiments/%s/' % (exp_id) experiment = common.encoded_get(experiment_url, keypair) if experiment.get('target'): target_url = server + experiment.get('target') target = common.encoded_get(target_url, keypair) else: logging.error('Experiment has no target ... skipping') continue print("%s %s %s" % (experiment['accession'], target.get('investigated_as'), experiment.get('description'))) tas = get_tas(experiment, server, keypair, args.project, args.inf, control_dxhandler) if not tas: logging.error('Failed to resolve all tagaligns for %s' % (experiment['accession'])) continue if not tas.get('rep2_ta'): simplicate_experiment = True print("Simplicate experiment ta's:") else: simplicate_experiment = False print("Replicated experiment ta's:") pprint(tas) # sys.exit() # continue for key, value in tas.iteritems(): if not value: logging.error('Missing %s ... skipping' % (key)) continue workflow_title = '%s Peaks' % (exp_id) if args.tag: workflow_title += ' %s' % (args.tag) outf = args.outf if not outf.startswith('/') and outf != '/': outf = '/' + outf if not outf.endswith('/') and outf != '/': outf += '/' outf += '%s/peaks/' % (exp_id) try: investigated_as = target['investigated_as'] except: logging.error("%s: Failed to determine target type ... skipping" % (exp_id)) continue else: print(investigated_as) rep1_pe = tas['rep1_ta']['paired_end'] if not simplicate_experiment: rep2_pe = tas['rep2_ta']['paired_end'] else: rep2_pe = None if simplicate_experiment and rep1_pe is None: logging.error( "%s: Cannot determine paired end: rep1 PE = %s... skipping" % (exp_id, rep1_pe)) continue elif not simplicate_experiment and None in [rep1_pe, rep2_pe]: logging.error( "%s: Cannot determine paired end: rep1 PE = %s, rep2 PE = %s ... skipping" % (exp_id, rep1_pe, rep2_pe)) continue if not simplicate_experiment and rep1_pe != rep2_pe: logging.error( "%s: rep1 PE %s differs from rep2 PE %s ... skipping" % (exp_id, rep1_pe, rep2_pe)) continue if any('histone' in target_type for target_type in investigated_as): logging.info( "%s: Found to be histone. No blacklist will be used." % (exp_id)) wf_target = 'histone' blacklist = None else: logging.info("Assumed to be tf") wf_target = 'tf' if not args.blacklist: if args.assembly in ASSEMBLY_METADATA: blacklist = ASSEMBLY_METADATA[args.assembly]['blacklist'] else: logging.warning( "%s: No blacklist for assembly %s, proceeding with no blacklist" % (exp_id, args.assembly)) blacklist = None if not args.gsize: if args.assembly in ASSEMBLY_METADATA: genomesize = ASSEMBLY_METADATA[args.assembly]['gsize'] else: logging.error("%s: Must specify -gsize for assembly %s" % (exp_id, args.assembly)) else: genomesize = args.gsize if not args.csizes: if args.assembly in ASSEMBLY_METADATA: chrom_sizes = ASSEMBLY_METADATA[args.assembly]['csizes'] else: logging.error("%s: Must specify -csizes for assembly %s" % (exp_id, args.assembly)) else: chrom_sizes = args.csizes chip_workflow_absolute_path = os.path.dirname( os.path.realpath(__file__)) + "/chip_workflow.py" command_strings = [ chip_workflow_absolute_path, '--nomap --yes', '--target %s' % (wf_target), '--title "%s"' % (workflow_title), '--outf "%s"' % (outf), '--rep1pe %s' % (str(rep1_pe).lower()), '--rep1 %s' % (tas['rep1_ta'].get('file_id')), '--ctl1 %s' % (tas['rep1_ta'].get('control_id')), '--genomesize %s --chrom_sizes "%s"' % (genomesize, chrom_sizes), '--spp_version %s' % (args.spp_version) ] if not simplicate_experiment: command_strings.extend([ '--rep2pe %s' % (str(rep2_pe).lower()), '--rep2 %s' % (tas['rep2_ta'].get('file_id')), '--ctl2 %s' % (tas['rep2_ta'].get('control_id')), ]) if args.fragment_length: command_strings.append('--fragment_length %s' % str(args.fragment_length)) if blacklist: command_strings.append('--blacklist "%s"' % (blacklist)) if args.debug: command_strings.append('--debug') if args.use_existing_folders: command_strings.append('--use_existing_folders') if args.accession: command_strings.append('--accession') if args.fqcheck is not None: command_strings.append('--fqcheck=%s' % (args.fqcheck)) if args.skip_control is not None: command_strings.append('--skip_control=%s' % (args.skip_control)) if args.force_patch is not None: command_strings.append('--force_patch=%s' % (args.force_patch)) run_command = ' '.join(command_strings) print(run_command) if args.dryrun: logging.info('Dryrun') else: try: subprocess.check_call(run_command, shell=True) except subprocess.CalledProcessError as e: logging.error( "%s: chip_workflow exited with non-zero code %d" % (exp_id, e.returncode)) else: print("%s workflow created" % (experiment['accession'])) logging.debug("%s: patching internal_status to url %s" % (exp_id, experiment_url)) r = common.encoded_patch(experiment_url, keypair, {'internal_status': 'processing'}, return_response=True) try: r.raise_for_status() except: logging.warning( "%s: Failed to update experiment internal_status to processing. Skipping that update." % (exp_id)) logging.debug(r.text)
def main(): global args args = get_args() authid, authpw, server = common.processkey(args.key, args.keyfile) keypair = (authid, authpw) if args.experiments: exp_ids = csv.reader( StringIO.StringIO('\n'.join([s.rstrip() for s in args.experiments]))) else: exp_ids = csv.reader(args.infile) for row in exp_ids: if row[0].startswith('#'): continue exp_id = row[0].strip() if len(row) > 1: repns = [] for s in row[1:]: repns.extend(s.split(',')) map_only_reps = list(set([int(s) for s in repns])) else: map_only_reps = [] outstrings = [] encode_url = urlparse.urljoin(server, exp_id) experiment = common.encoded_get(encode_url, keypair) outstrings.append(exp_id) files = files_to_map(experiment, server, keypair, args.no_sfn_dupes) outstrings.append(str(len(files))) outstrings.append(str([f.get('accession') for f in files])) replicates = replicates_to_map(files, server, keypair, map_only_reps) biorep_numbers = \ set([rep.get('biological_replicate_number') for rep in replicates]) in_process = False if files: for biorep_n in biorep_numbers: outstrings.append('rep%s' % (biorep_n)) biorep_files = [ f for f in files if biorep_n in common.biorep_ns(f, server, keypair) ] paired_files = [] unpaired_files = [] while biorep_files: file_object = biorep_files.pop() if file_object.get( 'paired_end' ) == None: # group all the unpaired reads for this biorep together unpaired_files.append(file_object) elif file_object.get('paired_end') in ['1', '2']: if file_object.get('paired_with'): mate = next((f for f in biorep_files if f.get( '@id') == file_object.get('paired_with')), None) else: #have to find the file that is paired with this one mate = next((f for f in biorep_files if f.get( 'paired_with') == file_object.get('@id')), None) if mate: biorep_files.remove(mate) else: logging.warning('%s:%s could not find mate' % (experiment.get('accession'), file_object.get('accession'))) mate = {} # if mapping as SE, ignore the mate and just map the # rep1 as SE with all the other SE for this rep, if any if args.force_se: unpaired_files.append( next(f for f in [file_object, mate] if f.get('paired_end') == '1')) else: paired_files.append((file_object, mate)) if biorep_files: logging.warning( '%s: leftover file(s) %s' % (experiment.get('accession'), biorep_files)) if paired_files: pe_jobs = \ map_only(experiment, biorep_n, paired_files, server, keypair, args.sex_specific, args.crop_length, args.accession, args.fqcheck, args.force_patch, args.use_existing_folders, args.encoded_check) in_process = True if unpaired_files: se_jobs = \ map_only(experiment, biorep_n, unpaired_files, server, keypair, args.sex_specific, args.crop_length, args.accession, args.fqcheck, args.force_patch, args.use_existing_folders, args.encoded_check) in_process = True if paired_files and pe_jobs: outstrings.append( 'paired:%s' % ([(a.get('accession'), b.get('accession')) for (a, b) in paired_files])) outstrings.append('paired jobs:%s' % ([j.get_id() for j in pe_jobs])) else: outstrings.append('paired:%s' % (None)) if unpaired_files and se_jobs: outstrings.append( 'unpaired:%s' % ([f.get('accession') for f in unpaired_files])) outstrings.append('unpaired jobs:%s' % ([j.get_id() for j in se_jobs])) else: outstrings.append('unpaired:%s' % (None)) if in_process: r = common.encoded_patch(encode_url, keypair, {"internal_status": "processing"}, return_response=True) try: r.raise_for_status() except: logging.error("Tried and failed to set internal_status") logging.error(r.text) print('\t'.join(outstrings)) else: # no files if not replicates: logging.warning('%s: No files and no replicates' % experiment.get('accession')) else: logging.warning('%s: No files to map' % experiment.get('accession')) if files and not replicates: logging.warning('%s: Files but no replicates' % experiment.get('accession'))
def main(): args = get_args() authid, authpw, server = common.processkey(args.key, args.keyfile) keypair = (authid, authpw) experiments = [] if args.experiments: experiments.extend(args.experiments) if args.infile: with open(args.infile, 'r') as fh: experiments.extend([e for e in fh]) if args.control: control_dxhandler = resolve_dx_file(args.control) else: control_dxhandler = None for exp_id in experiments: if exp_id.startswith('#'): continue exp_id = exp_id.rstrip() print("Experiment %s" % (exp_id)) experiment_url = server + '/experiments/%s/' % (exp_id) experiment = common.encoded_get(experiment_url, keypair) if experiment.get('target'): target_url = server + experiment.get('target') target = common.encoded_get(target_url, keypair) else: logging.error('Experiment has no target ... skipping') continue print( "%s %s %s" % (experiment['accession'], target.get('investigated_as'), experiment.get('description'))) tas = get_tas(experiment, server, keypair, args.project, args.inf, control_dxhandler) if not tas: logging.error( 'Failed to resolve all tagaligns for %s' % (experiment['accession'])) continue if not tas.get('rep2_ta'): simplicate_experiment = True print("Simplicate experiment ta's:") else: simplicate_experiment = False print("Replicated experiment ta's:") pprint(tas) # sys.exit() # continue for key, value in tas.iteritems(): if not value: logging.error('Missing %s ... skipping' % (key)) continue workflow_title = '%s Peaks' % (exp_id) if args.tag: workflow_title += ' %s' % (args.tag) outf = args.outf if not outf.startswith('/') and outf != '/': outf = '/'+outf if not outf.endswith('/') and outf != '/': outf += '/' outf += '%s/peaks/' % (exp_id) try: investigated_as = target['investigated_as'] except: logging.error( "%s: Failed to determine target type ... skipping" % (exp_id)) continue else: print(investigated_as) rep1_pe = tas['rep1_ta']['paired_end'] if not simplicate_experiment: rep2_pe = tas['rep2_ta']['paired_end'] else: rep2_pe = None if simplicate_experiment and rep1_pe is None: logging.error( "%s: Cannot determine paired end: rep1 PE = %s... skipping" % (exp_id, rep1_pe)) continue elif not simplicate_experiment and None in [rep1_pe, rep2_pe]: logging.error( "%s: Cannot determine paired end: rep1 PE = %s, rep2 PE = %s ... skipping" % (exp_id, rep1_pe, rep2_pe)) continue if not simplicate_experiment and rep1_pe != rep2_pe: logging.error( "%s: rep1 PE %s differs from rep2 PE %s ... skipping" % (exp_id, rep1_pe, rep2_pe)) continue if any('histone' in target_type for target_type in investigated_as): logging.info( "%s: Found to be histone. No blacklist will be used." % (exp_id)) wf_target = 'histone' blacklist = None else: logging.info("Assumed to be tf") wf_target = 'tf' if not args.blacklist: if args.assembly in ASSEMBLY_METADATA: blacklist = ASSEMBLY_METADATA[args.assembly]['blacklist'] else: logging.warning( "%s: No blacklist for assembly %s, proceeding with no blacklist" % (exp_id, args.assembly)) blacklist = None if not args.gsize: if args.assembly in ASSEMBLY_METADATA: genomesize = ASSEMBLY_METADATA[args.assembly]['gsize'] else: logging.error( "%s: Must specify -gsize for assembly %s" % (exp_id, args.assembly)) else: genomesize = args.gsize if not args.csizes: if args.assembly in ASSEMBLY_METADATA: chrom_sizes = ASSEMBLY_METADATA[args.assembly]['csizes'] else: logging.error( "%s: Must specify -csizes for assembly %s" % (exp_id, args.assembly)) else: chrom_sizes = args.csizes chip_workflow_absolute_path = os.path.dirname(os.path.realpath(__file__)) + "/chip_workflow.py" command_strings = [ chip_workflow_absolute_path, '--nomap --yes', '--target %s' % (wf_target), '--title "%s"' % (workflow_title), '--outf "%s"' % (outf), '--rep1pe %s' % (str(rep1_pe).lower()), '--rep1 %s' % (tas['rep1_ta'].get('file_id')), '--ctl1 %s' % (tas['rep1_ta'].get('control_id')), '--genomesize %s --chrom_sizes "%s"' % (genomesize, chrom_sizes), '--spp_version %s' % (args.spp_version) ] if not simplicate_experiment: command_strings.extend([ '--rep2pe %s' % (str(rep2_pe).lower()), '--rep2 %s' % (tas['rep2_ta'].get('file_id')), '--ctl2 %s' % (tas['rep2_ta'].get('control_id')), ]) if args.spp_instance: command_strings.append('--spp_instance %s' % str(args.spp_instance)) if args.fragment_length: command_strings.append('--fragment_length %s' % str(args.fragment_length)) if blacklist: command_strings.append('--blacklist "%s"' % (blacklist)) if args.debug: command_strings.append('--debug') if args.use_existing_folders: command_strings.append('--use_existing_folders') if args.accession: command_strings.append('--accession') if args.fqcheck is not None: command_strings.append('--fqcheck=%s' % (args.fqcheck)) if args.skip_control is not None: command_strings.append('--skip_control=%s' % (args.skip_control)) if args.force_patch is not None: command_strings.append('--force_patch=%s' % (args.force_patch)) run_command = ' '.join(command_strings) print(run_command) if args.dryrun: logging.info('Dryrun') else: try: subprocess.check_call(run_command, shell=True) except subprocess.CalledProcessError as e: logging.error( "%s: chip_workflow exited with non-zero code %d" % (exp_id, e.returncode)) else: print("%s workflow created" % (experiment['accession'])) logging.debug( "%s: patching internal_status to url %s" % (exp_id, experiment_url)) r = common.encoded_patch( experiment_url, keypair, {'internal_status': 'processing'}, return_response=True) try: r.raise_for_status() except: logging.warning( "%s: Failed to update experiment internal_status to processing. Skipping that update." % (exp_id)) logging.debug(r.text)