def find_control_file(self,rep,default=None):
     '''Attempts to find an appropriate control file.'''
     # TODO Make more generic and move to dxencode.py when needed.
     
     (AUTHID,AUTHPW,SERVER) = dxencode.processkey(self.server_key)
     if 'controls' not in rep:
         return None
     for file_key in rep['controls']:
         if isinstance(file_key,list):
             file_key = file_key[0]
         file_obj = dxencode.enc_lookup_json(file_key,self.server_key,frame='embedded')
         rep_id = file_obj["replicate"]['@id']
         rep_obj = dxencode.enc_lookup_json(rep_id,self.server_key,frame='embedded')
         exp_id = rep_obj['experiment']['@id'].split('/')[2]
         rep_tech = "rep%s_%s" % \
                 (rep_obj['biological_replicate_number'], rep_obj['technical_replicate_number'])
         control_root = self.psv['control_path']
         # Cheating:
         if self.proj_name == "scratchPad" and self.psv['control_path'] == self.CONTROL_ROOT_FOLDER:
             control_root = "/lrna"
         path_n_glob = control_root + exp_id + '/' + rep_tech + '/' + self.CONTROL_FILE_GLOB
         target_folder = dxencode.find_folder(exp_id + '/' + rep_tech,self.project,control_root)
         #print "Target found [%s]" % target_folder
         if target_folder != None:
             path_n_glob = target_folder + '/' + self.CONTROL_FILE_GLOB
         fid = dxencode.find_file(path_n_glob,self.proj_id,multiple=False,recurse=False)
         if fid != None:
             return dxencode.file_path_from_fid(fid)
             
     if default != None:
         return default
     #print json.dumps(rep,indent=4)
     print "Unable to find control in search of %s" % rep['controls']
     sys.exit(1)
Exemple #2
0
 def __init__(self):
     self.args = self.get_args()
     # expect graph of form (child, parent)
     self.expected_graph = {
         'unpaired': {},
         'paired': {}
     }
     self.assay_term_name = ''
     if self.args.test:
         key = 'test'
     else:
         key = 'www'
     (self.authid, self.authpw, self.server) = dxencode.processkey(key)
     self.experiments = []
 def find_control_file(self,rep,default=None):
     '''Attempts to find an appropriate control file.'''
     # TODO Make more generic and move to dxencode.py when needed.
     
     (AUTHID,AUTHPW,SERVER) = dxencode.processkey(self.server_key)
     for file_key in rep['controls']:
         url = '%s%s/?format=json&frame=embedded' % (SERVER,file_key)
         #print '-- ' + AUTHID + " " + AUTHPW + " " + SERVER + " " + url
         try:
             response = dxencode.encoded_get(url, AUTHID, AUTHPW)
             file_obj = response.json()
         except:
             print "URL to control [%s] returned ?" % url
             print response
             sys.exit(1)
         #print json.dumps(response,indent=4)
         rep_id = file_obj["replicate"]['@id']
         url = '%s%s/?format=json&frame=embedded' % (SERVER,rep_id)
         try:
             response = dxencode.encoded_get(url, AUTHID, AUTHPW)
             rep_obj = response.json()
         except:
             print "URL to replicate [%s] returned ?" % url
             print response
             sys.exit(1)
         exp_id = rep_obj['experiment'].split('/')[2]
         rep_tech = "rep%s_%s" % \
                 (rep_obj['biological_replicate_number'], rep_obj['technical_replicate_number'])
         # default by cheating
         if self.proj_name == dxencode.PRODUCTION_PROJECT:
             control_root = "/long-RNA-seq/runs/"
         else:
             control_root = self.CONTROL_ROOT_FOLDER
         path_n_glob = control_root + exp_id + '/' + rep_tech + '/' + self.CONTROL_FILE_GLOB
         target_folder = dxencode.find_folder(exp_id + '/' + rep_tech,self.project,control_root)
         #print "Target found [%s]" % target_folder
         if target_folder != None:
             path_n_glob = target_folder + '/' + self.CONTROL_FILE_GLOB
         fid = dxencode.find_file(path_n_glob,self.proj_id,multiple=False,recurse=False)
         if fid != None:
             return dxencode.file_path_from_fid(fid)
             
     if default != None:
         return default
     print "Unable to find control in search of %s" % rep['controls']
     sys.exit(1)
Exemple #4
0
#!/usr/bin/env python

import dxencode
import dxpy
import json
import requests

def patch(obs):
    for fob in obs:
        if fob['file_format'] == 'fastq' or fob['status'] == 'revoked':
            continue
        fn = fob['submitted_file_name']
        folder = dxpy.describe(dxpy.find_one_data_object(name=fn.strip('/'), project='project-BQkYKg00F1GP55qQ9Qy00VP0')['id'])['folder']
        newfn = folder+'/'+fn.strip('/')
        print "Patch: %s with %s" % (fn, newfn)
        res = requests.patch(srv+fob['@id'], auth=(id,pw), data=json.dumps({'submitted_file_name': newfn}),headers={'content-type': 'application/json'})
        try:
            res.raise_for_status()
            print "Success"
        except Exception, e:
            print "Failed %s" % e


(id,pw,srv) = dxencode.processkey('www')

accs = dxencode.encoded_get(srv+'ENCSR000AEV', AUTHID=id, AUTHPW=pw).json()['original_files']
file_objs = [ dxencode.encoded_get(srv+acc, AUTHID=id, AUTHPW=pw).json() for acc in accs ]

patch(file_objs)
Exemple #5
0
    def run(self):
        '''Runs recovery from start to finish using command line arguments.'''
        args = self.get_args()
        self.test = args.test
        self.ignore = False
        if args.ignore_properties:
            print "Ignoring DXFile properties (will post to test server)"
            self.ignore = args.ignore_properties
            self.server_key = 'test' # mandated because option is dangerous
            
        self.server_key = args.server
        self.authid, self.authpw, self.server = dxencode.processkey(self.server_key)
        
        if self.server_key == "www":
            self.acc_prefix = "ENCFF"
        self.proj_name = dxencode.env_get_current_project()
        if self.proj_name == None or args.project != None:
            self.proj_name = args.project
        if self.proj_name == None:
            print "Please enter a '--project' to run in."
            sys.exit(1)

        self.project = dxencode.get_project(self.proj_name)
        self.proj_id = self.project.get_id()
        print "== Running in project [%s] and will attempt recovery to the [%s] server ==" % \
                                                        (self.proj_name,self.server_key)

        exp_count = 0
        halted = 0
        total_recovered = 0
        for exp_id in args.experiments:
            sys.stdout.flush() # Slow running job should flush to piped log
            self.exp_id = exp_id
            self.obj_cache["exp"] = {}  # clear exp cache, which will hold exp specific wf_run and step_run objects
            # 1) Lookup experiment type from encoded, based on accession
            print "Working on %s..." % self.exp_id
            self.exp = dxencode.get_exp(self.exp_id,must_find=True,key=self.server_key)
            if self.exp == None or self.exp["status"] == "error":
                print "Unable to locate experiment %s in encoded (%s)" % (self.exp_id, self.server_key)
                continue
            self.exp_type = dxencode.get_exp_type(self.exp_id,self.exp,self.EXPERIMENT_TYPES_SUPPORTED)
            if self.exp_type == None:
                continue

            # 2) Locate the experiment accession named folder
            # NOTE: genome and annotation are not known for this exp yet, so the umbrella folder is just based on exp_type
            self.umbrella_folder = dxencode.umbrella_folder(args.folder,self.FOLDER_DEFAULT,self.proj_name,self.exp_type)
            self.exp_folder = dxencode.find_exp_folder(self.project,exp_id,self.umbrella_folder,warn=True)
            if self.exp_folder == None:
                continue
            print "- Examining %s:%s for '%s' results..." % \
                                            (self.proj_name, self.exp_folder, self.exp_type)

            # 3) Given the experiment type, determine the expected results
            self.pipeline   = self.pipeline_specification(args,self.exp_type,self.exp_folder)
            self.replicates = self.find_replicate_folders(self.exp_folder, verbose=args.verbose)

            # 4) Given expected results locate any files (by glob) that should be posted for
            #    a) each single replicate (in replicate sub-folders named as reN_N/
            #    b) combined replicates in the experiment folder itself
            files_expected = self.find_expected_files(self.exp_folder, self.replicates, verbose=args.verbose)
            print "- Found %d files that are available in DX." % len(files_expected)
            if len(files_expected) == 0:
                continue

            # 5) For each file that should be posted, determine if the file needs to be posted.
            files_posted = self.find_posted_files(files_expected, test=self.test, verbose=args.verbose) #True)
            print "- Found %d files that have been posted" % len(files_posted)
            if len(files_posted) == 0:
                continue

            # 6) For each file that needs to be posted:
            exp_count += 1
            file_count = 0
            recovery_count = 0
            for (out_type,rep_tech,fid) in files_posted:
                sys.stdout.flush() # Slow running job should flush to piped log
                accession = self.found[fid]['accession']
                file_name = dxencode.file_path_from_fid(fid)
                if args.start_at != None:
                    if accession != args.start_at and not file_name.endswith(args.start_at):
                        continue
                    else:
                        print "- Starting at %s" % (file_name)
                        args.start_at = None
                    
                # a) discover all necessary dx information needed for post.
                # b) gather any other information necessary from dx and encoded.
                print "- Handle file %s %s" % (accession,dxencode.file_path_from_fid(fid))
                payload = self.make_payload_obj(out_type,rep_tech,fid, verbose=args.verbose)

                file_count += 1
                # c) Update encoded database only if necessary.
                if self.file_metadata_recovery(fid,payload,args.test,verbose=args.verbose):
                    recovery_count += 1

                if args.files != 0 and file_count >= args.files:  # Short circuit for test
                    print "- Just trying %d file(s) by request" % file_count
                    break

            if not args.test:
                print "- For %s Processed %d file(s), recovered %s" % (self.exp_id, file_count, recovery_count)
            else:
                print "- For %s Processed %d file(s), would recover %s" % (self.exp_id, file_count, recovery_count)
            total_recovered += recovery_count

        if not args.test:
            print "Processed %d experiment(s), halted %d, recovered %d file(s)" % (exp_count, halted, total_recovered)
        else:
            print "Processed %d experiment(s), halted %d, would recover %d file(s)" % (exp_count, halted, total_recovered)
        if halted == exp_count:
            sys.exit(1)
        print "(finished)"
Exemple #6
0
    for fob in obs:
        if fob['file_format'] == 'fastq' or fob['status'] == 'revoked':
            continue
        fn = fob['submitted_file_name']
        folder = dxpy.describe(
            dxpy.find_one_data_object(
                name=fn.strip('/'),
                project='project-BQkYKg00F1GP55qQ9Qy00VP0')['id'])['folder']
        newfn = folder + '/' + fn.strip('/')
        print "Patch: %s with %s" % (fn, newfn)
        res = requests.patch(srv + fob['@id'],
                             auth=(id, pw),
                             data=json.dumps({'submitted_file_name': newfn}),
                             headers={'content-type': 'application/json'})
        try:
            res.raise_for_status()
            print "Success"
        except Exception, e:
            print "Failed %s" % e


(id, pw, srv) = dxencode.processkey('www')

accs = dxencode.encoded_get(srv + 'ENCSR000AEV', AUTHID=id,
                            AUTHPW=pw).json()['original_files']
file_objs = [
    dxencode.encoded_get(srv + acc, AUTHID=id, AUTHPW=pw).json()
    for acc in accs
]

patch(file_objs)