def find_control_file(self,rep,default=None): '''Attempts to find an appropriate control file.''' # TODO Make more generic and move to dxencode.py when needed. (AUTHID,AUTHPW,SERVER) = dxencode.processkey(self.server_key) if 'controls' not in rep: return None for file_key in rep['controls']: if isinstance(file_key,list): file_key = file_key[0] file_obj = dxencode.enc_lookup_json(file_key,self.server_key,frame='embedded') rep_id = file_obj["replicate"]['@id'] rep_obj = dxencode.enc_lookup_json(rep_id,self.server_key,frame='embedded') exp_id = rep_obj['experiment']['@id'].split('/')[2] rep_tech = "rep%s_%s" % \ (rep_obj['biological_replicate_number'], rep_obj['technical_replicate_number']) control_root = self.psv['control_path'] # Cheating: if self.proj_name == "scratchPad" and self.psv['control_path'] == self.CONTROL_ROOT_FOLDER: control_root = "/lrna" path_n_glob = control_root + exp_id + '/' + rep_tech + '/' + self.CONTROL_FILE_GLOB target_folder = dxencode.find_folder(exp_id + '/' + rep_tech,self.project,control_root) #print "Target found [%s]" % target_folder if target_folder != None: path_n_glob = target_folder + '/' + self.CONTROL_FILE_GLOB fid = dxencode.find_file(path_n_glob,self.proj_id,multiple=False,recurse=False) if fid != None: return dxencode.file_path_from_fid(fid) if default != None: return default #print json.dumps(rep,indent=4) print "Unable to find control in search of %s" % rep['controls'] sys.exit(1)
def __init__(self): self.args = self.get_args() # expect graph of form (child, parent) self.expected_graph = { 'unpaired': {}, 'paired': {} } self.assay_term_name = '' if self.args.test: key = 'test' else: key = 'www' (self.authid, self.authpw, self.server) = dxencode.processkey(key) self.experiments = []
def find_control_file(self,rep,default=None): '''Attempts to find an appropriate control file.''' # TODO Make more generic and move to dxencode.py when needed. (AUTHID,AUTHPW,SERVER) = dxencode.processkey(self.server_key) for file_key in rep['controls']: url = '%s%s/?format=json&frame=embedded' % (SERVER,file_key) #print '-- ' + AUTHID + " " + AUTHPW + " " + SERVER + " " + url try: response = dxencode.encoded_get(url, AUTHID, AUTHPW) file_obj = response.json() except: print "URL to control [%s] returned ?" % url print response sys.exit(1) #print json.dumps(response,indent=4) rep_id = file_obj["replicate"]['@id'] url = '%s%s/?format=json&frame=embedded' % (SERVER,rep_id) try: response = dxencode.encoded_get(url, AUTHID, AUTHPW) rep_obj = response.json() except: print "URL to replicate [%s] returned ?" % url print response sys.exit(1) exp_id = rep_obj['experiment'].split('/')[2] rep_tech = "rep%s_%s" % \ (rep_obj['biological_replicate_number'], rep_obj['technical_replicate_number']) # default by cheating if self.proj_name == dxencode.PRODUCTION_PROJECT: control_root = "/long-RNA-seq/runs/" else: control_root = self.CONTROL_ROOT_FOLDER path_n_glob = control_root + exp_id + '/' + rep_tech + '/' + self.CONTROL_FILE_GLOB target_folder = dxencode.find_folder(exp_id + '/' + rep_tech,self.project,control_root) #print "Target found [%s]" % target_folder if target_folder != None: path_n_glob = target_folder + '/' + self.CONTROL_FILE_GLOB fid = dxencode.find_file(path_n_glob,self.proj_id,multiple=False,recurse=False) if fid != None: return dxencode.file_path_from_fid(fid) if default != None: return default print "Unable to find control in search of %s" % rep['controls'] sys.exit(1)
#!/usr/bin/env python import dxencode import dxpy import json import requests def patch(obs): for fob in obs: if fob['file_format'] == 'fastq' or fob['status'] == 'revoked': continue fn = fob['submitted_file_name'] folder = dxpy.describe(dxpy.find_one_data_object(name=fn.strip('/'), project='project-BQkYKg00F1GP55qQ9Qy00VP0')['id'])['folder'] newfn = folder+'/'+fn.strip('/') print "Patch: %s with %s" % (fn, newfn) res = requests.patch(srv+fob['@id'], auth=(id,pw), data=json.dumps({'submitted_file_name': newfn}),headers={'content-type': 'application/json'}) try: res.raise_for_status() print "Success" except Exception, e: print "Failed %s" % e (id,pw,srv) = dxencode.processkey('www') accs = dxencode.encoded_get(srv+'ENCSR000AEV', AUTHID=id, AUTHPW=pw).json()['original_files'] file_objs = [ dxencode.encoded_get(srv+acc, AUTHID=id, AUTHPW=pw).json() for acc in accs ] patch(file_objs)
def run(self): '''Runs recovery from start to finish using command line arguments.''' args = self.get_args() self.test = args.test self.ignore = False if args.ignore_properties: print "Ignoring DXFile properties (will post to test server)" self.ignore = args.ignore_properties self.server_key = 'test' # mandated because option is dangerous self.server_key = args.server self.authid, self.authpw, self.server = dxencode.processkey(self.server_key) if self.server_key == "www": self.acc_prefix = "ENCFF" self.proj_name = dxencode.env_get_current_project() if self.proj_name == None or args.project != None: self.proj_name = args.project if self.proj_name == None: print "Please enter a '--project' to run in." sys.exit(1) self.project = dxencode.get_project(self.proj_name) self.proj_id = self.project.get_id() print "== Running in project [%s] and will attempt recovery to the [%s] server ==" % \ (self.proj_name,self.server_key) exp_count = 0 halted = 0 total_recovered = 0 for exp_id in args.experiments: sys.stdout.flush() # Slow running job should flush to piped log self.exp_id = exp_id self.obj_cache["exp"] = {} # clear exp cache, which will hold exp specific wf_run and step_run objects # 1) Lookup experiment type from encoded, based on accession print "Working on %s..." % self.exp_id self.exp = dxencode.get_exp(self.exp_id,must_find=True,key=self.server_key) if self.exp == None or self.exp["status"] == "error": print "Unable to locate experiment %s in encoded (%s)" % (self.exp_id, self.server_key) continue self.exp_type = dxencode.get_exp_type(self.exp_id,self.exp,self.EXPERIMENT_TYPES_SUPPORTED) if self.exp_type == None: continue # 2) Locate the experiment accession named folder # NOTE: genome and annotation are not known for this exp yet, so the umbrella folder is just based on exp_type self.umbrella_folder = dxencode.umbrella_folder(args.folder,self.FOLDER_DEFAULT,self.proj_name,self.exp_type) self.exp_folder = dxencode.find_exp_folder(self.project,exp_id,self.umbrella_folder,warn=True) if self.exp_folder == None: continue print "- Examining %s:%s for '%s' results..." % \ (self.proj_name, self.exp_folder, self.exp_type) # 3) Given the experiment type, determine the expected results self.pipeline = self.pipeline_specification(args,self.exp_type,self.exp_folder) self.replicates = self.find_replicate_folders(self.exp_folder, verbose=args.verbose) # 4) Given expected results locate any files (by glob) that should be posted for # a) each single replicate (in replicate sub-folders named as reN_N/ # b) combined replicates in the experiment folder itself files_expected = self.find_expected_files(self.exp_folder, self.replicates, verbose=args.verbose) print "- Found %d files that are available in DX." % len(files_expected) if len(files_expected) == 0: continue # 5) For each file that should be posted, determine if the file needs to be posted. files_posted = self.find_posted_files(files_expected, test=self.test, verbose=args.verbose) #True) print "- Found %d files that have been posted" % len(files_posted) if len(files_posted) == 0: continue # 6) For each file that needs to be posted: exp_count += 1 file_count = 0 recovery_count = 0 for (out_type,rep_tech,fid) in files_posted: sys.stdout.flush() # Slow running job should flush to piped log accession = self.found[fid]['accession'] file_name = dxencode.file_path_from_fid(fid) if args.start_at != None: if accession != args.start_at and not file_name.endswith(args.start_at): continue else: print "- Starting at %s" % (file_name) args.start_at = None # a) discover all necessary dx information needed for post. # b) gather any other information necessary from dx and encoded. print "- Handle file %s %s" % (accession,dxencode.file_path_from_fid(fid)) payload = self.make_payload_obj(out_type,rep_tech,fid, verbose=args.verbose) file_count += 1 # c) Update encoded database only if necessary. if self.file_metadata_recovery(fid,payload,args.test,verbose=args.verbose): recovery_count += 1 if args.files != 0 and file_count >= args.files: # Short circuit for test print "- Just trying %d file(s) by request" % file_count break if not args.test: print "- For %s Processed %d file(s), recovered %s" % (self.exp_id, file_count, recovery_count) else: print "- For %s Processed %d file(s), would recover %s" % (self.exp_id, file_count, recovery_count) total_recovered += recovery_count if not args.test: print "Processed %d experiment(s), halted %d, recovered %d file(s)" % (exp_count, halted, total_recovered) else: print "Processed %d experiment(s), halted %d, would recover %d file(s)" % (exp_count, halted, total_recovered) if halted == exp_count: sys.exit(1) print "(finished)"
for fob in obs: if fob['file_format'] == 'fastq' or fob['status'] == 'revoked': continue fn = fob['submitted_file_name'] folder = dxpy.describe( dxpy.find_one_data_object( name=fn.strip('/'), project='project-BQkYKg00F1GP55qQ9Qy00VP0')['id'])['folder'] newfn = folder + '/' + fn.strip('/') print "Patch: %s with %s" % (fn, newfn) res = requests.patch(srv + fob['@id'], auth=(id, pw), data=json.dumps({'submitted_file_name': newfn}), headers={'content-type': 'application/json'}) try: res.raise_for_status() print "Success" except Exception, e: print "Failed %s" % e (id, pw, srv) = dxencode.processkey('www') accs = dxencode.encoded_get(srv + 'ENCSR000AEV', AUTHID=id, AUTHPW=pw).json()['original_files'] file_objs = [ dxencode.encoded_get(srv + acc, AUTHID=id, AUTHPW=pw).json() for acc in accs ] patch(file_objs)