def interface(S, P, K, covs = None, I = None, model = 'LMM', return_fields= ['pv'], parallel = False, add_mean = True, jobs = 1, file_directory=None, delta_opt_params=None, Ftest = False): """ interface function for association testing S: SNPS [indiv X snps] Y: expr [indic X genes] covs: covariates return_filed: fields to caculate and return file_directory: if provided data handling and result storing will be handled via file IO delta_opt_params: parameters for delta optimization """ #1. check dimensions [Nn, Np] = P.shape Ns = S.shape[1] #check input arguments assert (S.shape[0]==Nn), 'dimension error' if K is not None: assert (K.shape[0]==Nn), 'dimension error' assert (K.shape[1]==Nn), 'dimension error' if I is not None: assert (I.shape[0]==Nn), 'dimension error' #covariates: if covs is None: covs = SP.zeros([Nn,0]) if add_mean: #add mean column ( we should generally do this ) covs = SP.concatenate((covs,SP.ones([Nn,1])),axis=1) #figure out base path cwd=os.getcwd() base_path = os.path.dirname(__file__) glob_base_path = os.path.join(cwd,base_path) paths = [glob_base_path] if file_directory is not None: if not os.path.exists(file_directory): print "file mode for testing specified but directory %s not existing; disableing" % file_directory #check whether to use file mode file_mode = ((file_directory is not None) and os.path.exists(file_directory)) #setup file handling if switched on in_file = None in_file_path = None out_file = None out_file_base = None if file_mode: import h5py in_file = os.path.join(file_directory,'data.hdf5') in_file_path = os.path.abspath(in_file) out_file_base = os.path.join(file_directory,'result_') #store matrix objects _data = {'S':S,'P':P,'K':K,'I':I,'covs': covs} f = h5py.File(in_file,'w') for key in _data: if _data[key] is None: continue f.create_dataset(name=key,data=_data[key],chunks=True) f.close() del(f) if not parallel: RV = testing_core.wrapper(S, P, K, covs = covs, I = I, model = model, path_append = paths,index_s = SP.arange(S.shape[1]), index_p = SP.arange(P.shape[1]), in_file = in_file_path, delta_opt_params=delta_opt_params, Ftest = Ftest) else: # setup engines and perform checks tc, eng_ids = start_engines(jobs) # split the data according to the number of jobs #note jobs is the max. number of jobs... Y_split = split_jobs(P, jobs) ipython_jobs = [] for n in xrange(len(Y_split)): i0 = Y_split[n][0] i1 = Y_split[n][1] P_n = Y_split[n][2] #arguments of job #convert all paths to absolute paths xargs=SP.array([S, P_n, K, covs, I, range(i0, i1), range(S.shape[1]), model, return_fields, paths, out_file, in_file_path, Ftest, delta_opt_params],dtype='object') #if in file_mode, overwrite: if file_mode: out_file = '%s%03d.pickle' % (out_file_base,n) if os.path.exists(out_file): os.remove(out_file) xargs[0:4] = None xargs[-3] = os.path.abspath(out_file) if ipy_version == 0.10: job = client.MapTask(testing_core.wrapper, xargs.tolist()) ipython_jobs.append(tc.run(job)) else: dview = eng_ids[:] lbview = tc.load_balanced_view() job = lbview.apply(testing_core.wrapper, *xargs.tolist()) ipython_jobs.append(job) #wait for job completion: if ipy_version == 0.10: tc.barrier(ipython_jobs) else: tc.wait(ipython_jobs) RV = None for n in xrange(len(Y_split)): i0 = Y_split[n][0] i1 = Y_split[n][1] #fecht results from job if ipy_version == 0.10: _rv = tc.get_task_result(ipython_jobs[n]) else: _rv = ipython_jobs[n].get() if isinstance(_rv,str): f = open(_rv,'rb') _rv = cPickle.load(f) f.close() _rv = testing_core.filter_dict(_rv,return_fields) #do we need to create result structures? if RV is None: RV = {} #get shape of all fields we need #append dimensions form result structure for key in _rv.keys(): val = _rv[key] dim0 = [Ns,Np] dim0.extend(list(val.shape[2::])) RV[key] = SP.empty(dim0) #slot in result fileds for key in _rv.keys(): RV[key][:,i0:i1] = _rv[key] return [RV[key] for key in return_fields]