def main(): args = get_args() if args.debug: logger.setLevel(logging.DEBUG) else: logger.setLevel(logging.INFO) authid, authpw, server = common.processkey(args.key, args.keyfile) keypair = (authid,authpw) if args.analysis_ids: ids = args.analysis_ids else: ids = args.infile formats = ['bed_narrowPeak', 'bed_gappedPeak'] fieldnames = ['file','analysis','experiment','replicates','output_name','file_format','output_type','target','biosample_term_name','biosample_term_id','biosample_type','biosample_life_stage','biosample_age','biosample_organism'] writer = csv.DictWriter(args.outfile, fieldnames, delimiter='\t') writer.writeheader() for (i, analysis_id) in enumerate(ids): analysis_id = analysis_id.rstrip() logger.info('%s' %(analysis_id)) try: files = analysis_files(analysis_id, keypair, server, args.assembly) except: logger.error('%s error finding analysis_files. Check experiment metadata.' %(analysis_id)) for f in [f_obj for f_obj in files if f_obj.get('file_format') in formats]: fid = f['dx'].get_id() local_path = os.path.join(args.outdir,fid) if not os.path.isfile(local_path): if not os.path.exists(args.outdir): os.makedirs(args.outdir) dxpy.download_dxfile(fid, local_path) replicates = [] for derived_from in f['derived_from']: rep_ns = common.biorep_ns(derived_from, server, keypair) for r in rep_ns: replicates.append(r) experiment = common.encoded_get(urlparse.urljoin(server,'/experiments/%s' %(f['dataset'])), keypair) rep = common.encoded_get(urlparse.urljoin(server, experiment['replicates'][0]), keypair) lib = common.encoded_get(urlparse.urljoin(server, rep['library']), keypair) biosample = common.encoded_get(urlparse.urljoin(server, lib['biosample']), keypair) writer.writerow({ 'file': fid, 'analysis': analysis_id, 'experiment': experiment.get('accession'), 'replicates': replicates, 'output_name': f.get('name'), 'file_format': f.get('file_format'), 'output_type': f.get('output_type'), 'target': experiment.get('target'), 'biosample_term_name': experiment.get('biosample_term_name'), 'biosample_term_id': experiment.get('biosample_term_id'), 'biosample_type': experiment.get('biosample_type'), 'biosample_life_stage': biosample.get('life_stage'), 'biosample_age': biosample.get('age'), 'biosample_organism': biosample.get('organism')})
def main(): global args args = get_args() authid, authpw, server = common.processkey(args.key, args.keyfile) keypair = (authid, authpw) if args.experiments: exp_ids = csv.reader( StringIO.StringIO('\n'.join([s.rstrip() for s in args.experiments]))) else: exp_ids = csv.reader(args.infile) for instring in exp_ids: exp_id = instring[0].strip() if len(instring) > 1: repns = [] for s in instring[1:]: repns.extend(s.split(',')) biorep_ns = list(set([int(s) for s in repns])) else: biorep_ns = [] outstrings = [] encode_url = urlparse.urljoin(server, exp_id) experiment = common.encoded_get(encode_url, keypair) outstrings.append(exp_id) files = files_to_map(experiment, server, keypair, args.sfn_dupes) outstrings.append(str(len(files))) outstrings.append(str([f.get('accession') for f in files])) replicates = replicates_to_map(files, server, keypair, biorep_ns) if files: for biorep_n in set( [rep.get('biological_replicate_number') for rep in replicates]): outstrings.append('rep%s' % (biorep_n)) biorep_files = [ f for f in files if biorep_n in common.biorep_ns(f, server, keypair) ] paired_files = [] unpaired_files = [] while biorep_files: file_object = biorep_files.pop() if file_object.get( 'paired_end' ) == None: # group all the unpaired reads for this biorep together unpaired_files.append(file_object) elif file_object.get('paired_end') in ['1', '2']: if file_object.get('paired_with'): mate = next((f for f in biorep_files if f.get( '@id') == file_object.get('paired_with')), None) else: #have to find the file that is paired with this one mate = next((f for f in biorep_files if f.get( 'paired_with') == file_object.get('@id')), None) if mate: biorep_files.remove(mate) else: logging.warning('%s:%s could not find mate' % (experiment.get('accession'), file_object.get('accession'))) mate = {} paired_files.append((file_object, mate)) if biorep_files: logging.warning( '%s: leftover file(s) %s' % (experiment.get('accession'), biorep_files)) if paired_files: pe_jobs = map_only(experiment, biorep_n, paired_files, args.key, server, keypair) if unpaired_files: se_jobs = map_only(experiment, biorep_n, unpaired_files, args.key, server, keypair) if paired_files and pe_jobs: outstrings.append( 'paired:%s' % ([(a.get('accession'), b.get('accession')) for (a, b) in paired_files])) outstrings.append('paired jobs:%s' % ([j.get_id() for j in pe_jobs])) else: outstrings.append('paired:%s' % (None)) if unpaired_files and se_jobs: outstrings.append( 'unpaired:%s' % ([f.get('accession') for f in unpaired_files])) outstrings.append('unpaired jobs:%s' % ([j.get_id() for j in se_jobs])) else: outstrings.append('unpaired:%s' % (None)) print '\t'.join(outstrings) else: # no files if not replicates: logging.warning('%s: No files and no replicates' % experiment.get('accession')) else: logging.warning('%s: No files to map' % experiment.get('accession')) if files and not replicates: logging.warning('%s: Files but no replicates' % experiment.get('accession'))
def main(): global args args = get_args() authid, authpw, server = common.processkey(args.key, args.keyfile) keypair = (authid,authpw) if args.experiments: exp_ids = csv.reader(StringIO.StringIO('\n'.join([s.rstrip() for s in args.experiments]))) else: exp_ids = csv.reader(args.infile) for instring in exp_ids: exp_id = instring[0].strip() if len(instring) > 1: repns = [] for s in instring[1:]: repns.extend(s.split(',')) biorep_ns = list(set([int(s) for s in repns])) else: biorep_ns = [] outstrings = [] encode_url = urlparse.urljoin(server,exp_id) experiment = common.encoded_get(encode_url, keypair) outstrings.append(exp_id) files = files_to_map(experiment, server, keypair, args.no_sfn_dupes) outstrings.append(str(len(files))) outstrings.append(str([f.get('accession') for f in files])) replicates = replicates_to_map(files, server, keypair, biorep_ns) in_process = False if files: for biorep_n in set([rep.get('biological_replicate_number') for rep in replicates]): outstrings.append('rep%s' %(biorep_n)) biorep_files = [f for f in files if biorep_n in common.biorep_ns(f,server,keypair)] paired_files = [] unpaired_files = [] while biorep_files: file_object = biorep_files.pop() if file_object.get('paired_end') == None: # group all the unpaired reads for this biorep together unpaired_files.append(file_object) elif file_object.get('paired_end') in ['1','2']: if file_object.get('paired_with'): mate = next((f for f in biorep_files if f.get('@id') == file_object.get('paired_with')), None) else: #have to find the file that is paired with this one mate = next((f for f in biorep_files if f.get('paired_with') == file_object.get('@id')), None) if mate: biorep_files.remove(mate) else: logging.warning('%s:%s could not find mate' %(experiment.get('accession'), file_object.get('accession'))) mate = {} # if mapping as SE, ignore the mate and just map the # rep1 as SE with all the other SE for this rep, if any if args.force_se: unpaired_files.append(next( f for f in [file_object, mate] if f.get('paired_end') == '1')) else: paired_files.append((file_object, mate)) if biorep_files: logging.warning('%s: leftover file(s) %s' %(experiment.get('accession'), biorep_files)) if paired_files: pe_jobs = map_only(experiment, biorep_n, paired_files, args.key, server, keypair, args.sex_specific) in_process = True if unpaired_files: se_jobs = map_only(experiment, biorep_n, unpaired_files, args.key, server, keypair, args.sex_specific) in_process = True if paired_files and pe_jobs: outstrings.append('paired:%s' %([(a.get('accession'), b.get('accession')) for (a,b) in paired_files])) outstrings.append('paired jobs:%s' %([j.get_id() for j in pe_jobs])) else: outstrings.append('paired:%s' %(None)) if unpaired_files and se_jobs: outstrings.append('unpaired:%s' %([f.get('accession') for f in unpaired_files])) outstrings.append('unpaired jobs:%s' %([j.get_id() for j in se_jobs])) else: outstrings.append('unpaired:%s' %(None)) if in_process: r = common.encoded_patch(encode_url, keypair, {"internal_status": "processing"}, return_response=True) try: r.raise_for_status() except: logging.error("Tried and failed to set internal_status") logging.error(r.text) print '\t'.join(outstrings) else: # no files if not replicates: logging.warning('%s: No files and no replicates' %experiment.get('accession')) else: logging.warning('%s: No files to map' %experiment.get('accession')) if files and not replicates: logging.warning('%s: Files but no replicates' %experiment.get('accession'))
def main(): args = get_args() if args.debug: logger.setLevel(logging.DEBUG) else: logger.setLevel(logging.INFO) authid, authpw, server = common.processkey(args.key, args.keyfile) keypair = (authid, authpw) if args.query: r = requests.get( args.query, auth=keypair, headers={"content-type": "application/json", "accept": "application/json"} ) experiments = r.json()["@graph"] exp_ids = [e["accession"] for e in experiments] elif args.experiments: exp_ids = args.experiments else: exp_ids = args.infile for (i, exp_id) in enumerate(exp_ids): exp_id = exp_id.strip() logger.info("%s" % (exp_id)) url = urlparse.urljoin(server, "/experiments/%s" % (exp_id)) experiment_object = common.encoded_get(url, keypair) original_files = [ common.encoded_get(urlparse.urljoin(server, "%s" % (uri)), keypair) for uri in experiment_object.get("original_files") ] bams = [ f for f in original_files if f.get("file_format") == "bam" and f.get("status") not in ["revoked", "deleted", "replaced"] ] fastqs = [ f for f in original_files if f.get("file_format") == "fastq" and f.get("status") not in ["revoked", "deleted", "replaced"] ] for f in fastqs: f["replicate"] = common.encoded_get(urlparse.urljoin(server, "%s" % (f.get("replicate"))), keypair) for bam in bams: bioreps = common.biorep_ns(bam.get("accession"), server, keypair) if len(bioreps) != 1: logger.error( "Expected to find 1 biorep for bam %s, found %d. Skipping." % (bam.get("accession"), len(bioreps)) ) continue else: bam_biorep = bioreps[0] try: derived_from = [ common.encoded_get(urlparse.urljoin(server, "%s" % (uri)), keypair) for uri in bam.get("derived_from") ] except: derived_from = None if not derived_from: logger.error("bam %s is derived from nothing. Skipping" % (bam.get("accession"))) continue for f in derived_from: if f.get("file_format") != "fastq": logger.error( "bam %s appears to be derived from non-fastq %s. Continuing with other derived_from files." % (bam.get("accession"), f.get("accession")) ) continue try: if common.after(f.get("date_created"), bam.get("date_created")): logger.error( "Date conflict. Bam %s is derived from newer Fastq %s" % (bam.get("accession"), f.get("accession")) ) except: logger.error( "Cannot compare bam date %s with fastq date %s. Continuing with other derived_from files." % (bam.get("date_created"), f.get("date_created")) ) continue for f in fastqs: if f.get("replicate").get("biological_replicate_number") == bam_biorep: if common.after(f.get("date_created"), bam.get("date_created")): logger.info( "bam %s is out-of-date. fastq %s is newer" % (bam.get("accession"), f.get("accession")) ) if re.search("control", experiment_object.get("target").lower()): logger.info( "WARNING, %s is a control experiment so many other experiments may be out-of-date." % (experiment_object.get("accession")) )
def main(): global args args = get_args() authid, authpw, server = common.processkey(args.key, args.keyfile) keypair = (authid,authpw) if args.experiments: exp_ids = csv.reader(StringIO.StringIO('\n'.join([s.rstrip() for s in args.experiments]))) else: exp_ids = csv.reader(args.infile) for instring in exp_ids: exp_id = instring[0].strip() if len(instring) > 1: repns = [] for s in instring[1:]: repns.extend(s.split(',')) biorep_ns = list(set([int(s) for s in repns])) else: biorep_ns = [] outstrings = [] encode_url = urlparse.urljoin(server,exp_id) experiment = common.encoded_get(encode_url, keypair) outstrings.append(exp_id) files = files_to_map(experiment, server, keypair, args.sfn_dupes) outstrings.append(str(len(files))) outstrings.append(str([f.get('accession') for f in files])) replicates = replicates_to_map(files, server, keypair, biorep_ns) if files: for biorep_n in set([rep.get('biological_replicate_number') for rep in replicates]): outstrings.append('rep%s' %(biorep_n)) biorep_files = [f for f in files if biorep_n in common.biorep_ns(f,server,keypair)] paired_files = [] unpaired_files = [] while biorep_files: file_object = biorep_files.pop() if file_object.get('paired_end') == None: # group all the unpaired reads for this biorep together unpaired_files.append(file_object) elif file_object.get('paired_end') in ['1','2']: if file_object.get('paired_with'): mate = next((f for f in biorep_files if f.get('@id') == file_object.get('paired_with')), None) else: #have to find the file that is paired with this one mate = next((f for f in biorep_files if f.get('paired_with') == file_object.get('@id')), None) if mate: biorep_files.remove(mate) else: logging.warning('%s:%s could not find mate' %(experiment.get('accession'), file_object.get('accession'))) mate = {} paired_files.append((file_object,mate)) if biorep_files: logging.warning('%s: leftover file(s) %s' %(experiment.get('accession'), biorep_files)) if paired_files: pe_jobs = map_only(experiment, biorep_n, paired_files, args.key, server, keypair) if unpaired_files: se_jobs = map_only(experiment, biorep_n, unpaired_files, args.key, server, keypair) if paired_files and pe_jobs: outstrings.append('paired:%s' %([(a.get('accession'), b.get('accession')) for (a,b) in paired_files])) outstrings.append('paired jobs:%s' %([j.get_id() for j in pe_jobs])) else: outstrings.append('paired:%s' %(None)) if unpaired_files and se_jobs: outstrings.append('unpaired:%s' %([f.get('accession') for f in unpaired_files])) outstrings.append('unpaired jobs:%s' %([j.get_id() for j in se_jobs])) else: outstrings.append('unpaired:%s' %(None)) print '\t'.join(outstrings) else: # no files if not replicates: logging.warning('%s: No files and no replicates' %experiment.get('accession')) else: logging.warning('%s: No files to map' %experiment.get('accession')) if files and not replicates: logging.warning('%s: Files but no replicates' %experiment.get('accession'))
def main(): args = get_args() if args.debug: logger.setLevel(logging.DEBUG) else: logger.setLevel(logging.INFO) authid, authpw, server = common.processkey(args.key, args.keyfile) keypair = (authid,authpw) if args.query: r = requests.get(args.query, auth=keypair, headers={'content-type': 'application/json', 'accept': 'application/json'}) experiments = r.json()['@graph'] exp_ids = [e['accession'] for e in experiments] elif args.experiments: exp_ids = args.experiments else: exp_ids = args.infile logger.info('Checking %d experiments' % (len(exp_ids))) for (i, exp_id) in enumerate(exp_ids): exp_id = exp_id.strip() #logger.info('%s' %(exp_id)) url = urlparse.urljoin(server, '/experiments/%s' %(exp_id)) experiment_object = common.encoded_get(url, keypair) original_files = [common.encoded_get(urlparse.urljoin(server,'%s' %(uri)), keypair) for uri in experiment_object.get('original_files')] bams = [f for f in original_files if f.get('file_format') == 'bam' and f.get('status') not in ['revoked','deleted','replaced']] fastqs = [f for f in original_files if f.get('file_format') == 'fastq' and f.get('status') not in ['revoked','deleted','replaced']] for f in fastqs: f['replicate'] = common.encoded_get(urlparse.urljoin(server,'%s' %(f.get('replicate'))), keypair) for bam in bams: bioreps = common.biorep_ns(bam.get('accession'),server,keypair) if len(bioreps) != 1: logger.error("Expected to find 1 biorep for bam %s, found %s. Skipping." %(bam.get('accession'), bioreps)) continue else: bam_biorep = bioreps[0] try: derived_from = [common.encoded_get(urlparse.urljoin(server,'%s' %(uri)), keypair) for uri in bam.get('derived_from')] except: derived_from = None if not derived_from: logger.error('bam %s is derived from nothing. Skipping' %(bam.get('accession'))) continue for f in derived_from: if f.get('output_category') == 'reference': continue if f.get('file_format') != 'fastq': logger.error("bam %s appears to be derived from non-fastq %s. Continuing with other derived_from files." %(bam.get('accession'), f.get('accession'))) continue try: if common.after(f.get('date_created'), bam.get('date_created')): logger.error("Date conflict. Bam %s is derived from newer Fastq %s" %(bam.get('accession'), f.get('accession'))) except: logger.error("Cannot compare bam date %s with fastq date %s. Continuing with other derived_from files." %(bam.get('date_created'), f.get('date_created'))) continue for f in fastqs: if f.get('replicate').get('biological_replicate_number') == bam_biorep: if common.after(f.get('date_created'), bam.get('date_created')): logger.info("bam %s is out-of-date. fastq %s is newer" %(bam.get('accession'), f.get('accession'))) if re.search('control',experiment_object.get('target').lower()): logger.info("WARNING, %s is a control experiment so many other experiments may be out-of-date." %(experiment_object.get('accession')))
def main(): global args args = get_args() authid, authpw, server = common.processkey(args.key, args.keyfile) keypair = (authid, authpw) if args.experiments: exp_ids = csv.reader( StringIO.StringIO('\n'.join([s.rstrip() for s in args.experiments]))) else: exp_ids = csv.reader(args.infile) for row in exp_ids: if row[0].startswith('#'): continue exp_id = row[0].strip() if len(row) > 1: repns = [] for s in row[1:]: repns.extend(s.split(',')) map_only_reps = list(set([int(s) for s in repns])) else: map_only_reps = [] outstrings = [] encode_url = urlparse.urljoin(server, exp_id) experiment = common.encoded_get(encode_url, keypair) outstrings.append(exp_id) files = files_to_map(experiment, server, keypair, args.no_sfn_dupes) outstrings.append(str(len(files))) outstrings.append(str([f.get('accession') for f in files])) replicates = replicates_to_map(files, server, keypair, map_only_reps) biorep_numbers = \ set([rep.get('biological_replicate_number') for rep in replicates]) in_process = False if files: for biorep_n in biorep_numbers: outstrings.append('rep%s' % (biorep_n)) biorep_files = [ f for f in files if biorep_n in common.biorep_ns(f, server, keypair) ] paired_files = [] unpaired_files = [] while biorep_files: file_object = biorep_files.pop() if file_object.get( 'paired_end' ) == None: # group all the unpaired reads for this biorep together unpaired_files.append(file_object) elif file_object.get('paired_end') in ['1', '2']: if file_object.get('paired_with'): mate = next((f for f in biorep_files if f.get( '@id') == file_object.get('paired_with')), None) else: #have to find the file that is paired with this one mate = next((f for f in biorep_files if f.get( 'paired_with') == file_object.get('@id')), None) if mate: biorep_files.remove(mate) else: logging.warning('%s:%s could not find mate' % (experiment.get('accession'), file_object.get('accession'))) mate = {} # if mapping as SE, ignore the mate and just map the # rep1 as SE with all the other SE for this rep, if any if args.force_se: unpaired_files.append( next(f for f in [file_object, mate] if f.get('paired_end') == '1')) else: paired_files.append((file_object, mate)) if biorep_files: logging.warning( '%s: leftover file(s) %s' % (experiment.get('accession'), biorep_files)) if paired_files: pe_jobs = \ map_only(experiment, biorep_n, paired_files, server, keypair, args.sex_specific, args.crop_length, args.accession, args.fqcheck, args.force_patch, args.use_existing_folders, args.encoded_check) in_process = True if unpaired_files: se_jobs = \ map_only(experiment, biorep_n, unpaired_files, server, keypair, args.sex_specific, args.crop_length, args.accession, args.fqcheck, args.force_patch, args.use_existing_folders, args.encoded_check) in_process = True if paired_files and pe_jobs: outstrings.append( 'paired:%s' % ([(a.get('accession'), b.get('accession')) for (a, b) in paired_files])) outstrings.append('paired jobs:%s' % ([j.get_id() for j in pe_jobs])) else: outstrings.append('paired:%s' % (None)) if unpaired_files and se_jobs: outstrings.append( 'unpaired:%s' % ([f.get('accession') for f in unpaired_files])) outstrings.append('unpaired jobs:%s' % ([j.get_id() for j in se_jobs])) else: outstrings.append('unpaired:%s' % (None)) if in_process: r = common.encoded_patch(encode_url, keypair, {"internal_status": "processing"}, return_response=True) try: r.raise_for_status() except: logging.error("Tried and failed to set internal_status") logging.error(r.text) print('\t'.join(outstrings)) else: # no files if not replicates: logging.warning('%s: No files and no replicates' % experiment.get('accession')) else: logging.warning('%s: No files to map' % experiment.get('accession')) if files and not replicates: logging.warning('%s: Files but no replicates' % experiment.get('accession'))
def main(): args = get_args() if args.debug: logger.setLevel(logging.DEBUG) else: logger.setLevel(logging.INFO) authid, authpw, server = common.processkey(args.key, args.keyfile) keypair = (authid, authpw) if args.analysis_ids: ids = args.analysis_ids else: ids = args.infile formats = ['bed_narrowPeak', 'bed_gappedPeak'] fieldnames = [ 'file', 'analysis', 'experiment', 'replicates', 'output_name', 'file_format', 'output_type', 'target', 'biosample_term_name', 'biosample_term_id', 'biosample_type', 'biosample_life_stage', 'biosample_age', 'biosample_organism' ] writer = csv.DictWriter(args.outfile, fieldnames, delimiter='\t') writer.writeheader() for (i, analysis_id) in enumerate(ids): analysis_id = analysis_id.rstrip() logger.info('%s' % (analysis_id)) try: files = analysis_files(analysis_id, keypair, server, args.assembly) except: logger.error( '%s error finding analysis_files. Check experiment metadata.' % (analysis_id)) for f in [ f_obj for f_obj in files if f_obj.get('file_format') in formats ]: fid = f['dx'].get_id() local_path = os.path.join(args.outdir, fid) if not os.path.isfile(local_path): if not os.path.exists(args.outdir): os.makedirs(args.outdir) dxpy.download_dxfile(fid, local_path) replicates = [] for derived_from in f['derived_from']: rep_ns = common.biorep_ns(derived_from, server, keypair) for r in rep_ns: replicates.append(r) experiment = common.encoded_get( urlparse.urljoin(server, '/experiments/%s' % (f['dataset'])), keypair) rep = common.encoded_get( urlparse.urljoin(server, experiment['replicates'][0]), keypair) lib = common.encoded_get(urlparse.urljoin(server, rep['library']), keypair) biosample = common.encoded_get( urlparse.urljoin(server, lib['biosample']), keypair) writer.writerow({ 'file': fid, 'analysis': analysis_id, 'experiment': experiment.get('accession'), 'replicates': replicates, 'output_name': f.get('name'), 'file_format': f.get('file_format'), 'output_type': f.get('output_type'), 'target': experiment.get('target'), 'biosample_term_name': experiment.get('biosample_term_name'), 'biosample_term_id': experiment.get('biosample_term_id'), 'biosample_type': experiment.get('biosample_type'), 'biosample_life_stage': biosample.get('life_stage'), 'biosample_age': biosample.get('age'), 'biosample_organism': biosample.get('organism') })
def main(): args = get_args() if args.debug: logger.setLevel(logging.DEBUG) else: logger.setLevel(logging.INFO) authid, authpw, server = common.processkey(args.key, args.keyfile) keypair = (authid, authpw) if args.query: r = requests.get(args.query, auth=keypair, headers={ 'content-type': 'application/json', 'accept': 'application/json' }) experiments = r.json()['@graph'] exp_ids = [e['accession'] for e in experiments] elif args.experiments: exp_ids = args.experiments else: exp_ids = args.infile logger.info('Checking %d experiments' % (len(exp_ids))) for (i, exp_id) in enumerate(exp_ids): exp_id = exp_id.strip() #logger.info('%s' %(exp_id)) url = urlparse.urljoin(server, '/experiments/%s' % (exp_id)) experiment_object = common.encoded_get(url, keypair) original_files = [ common.encoded_get(urlparse.urljoin(server, '%s' % (uri)), keypair) for uri in experiment_object.get('original_files') ] bams = [ f for f in original_files if f.get('file_format') == 'bam' and f.get('status') not in ['revoked', 'deleted', 'replaced'] ] fastqs = [ f for f in original_files if f.get('file_format') == 'fastq' and f.get('status') not in ['revoked', 'deleted', 'replaced'] ] for f in fastqs: f['replicate'] = common.encoded_get( urlparse.urljoin(server, '%s' % (f.get('replicate'))), keypair) for bam in bams: bioreps = common.biorep_ns(bam.get('accession'), server, keypair) if len(bioreps) != 1: logger.error( "Expected to find 1 biorep for bam %s, found %s. Skipping." % (bam.get('accession'), bioreps)) continue else: bam_biorep = bioreps[0] try: derived_from = [ common.encoded_get(urlparse.urljoin(server, '%s' % (uri)), keypair) for uri in bam.get('derived_from') ] except: derived_from = None if not derived_from: logger.error('bam %s is derived from nothing. Skipping' % (bam.get('accession'))) continue for f in derived_from: if f.get('output_category') == 'reference': continue if f.get('file_format') != 'fastq': logger.error( "bam %s appears to be derived from non-fastq %s. Continuing with other derived_from files." % (bam.get('accession'), f.get('accession'))) continue try: if common.after(f.get('date_created'), bam.get('date_created')): logger.error( "Date conflict. Bam %s is derived from newer Fastq %s" % (bam.get('accession'), f.get('accession'))) except: logger.error( "Cannot compare bam date %s with fastq date %s. Continuing with other derived_from files." % (bam.get('date_created'), f.get('date_created'))) continue for f in fastqs: if f.get('replicate').get( 'biological_replicate_number') == bam_biorep: if common.after(f.get('date_created'), bam.get('date_created')): logger.info( "bam %s is out-of-date. fastq %s is newer" % (bam.get('accession'), f.get('accession'))) if re.search('control', experiment_object.get('target').lower()): logger.info( "WARNING, %s is a control experiment so many other experiments may be out-of-date." % (experiment_object.get('accession')))