def PATCH( self, request): """ Insert/update run """ self.app.logger.info("Run.PATHC()") req = self._req_to_dict( request ) self.app.logger.debug( "request %r" % req ) #make no distinction between insert and update #testing autopush req.pop('run_id', None)#remove run_id from dict, using self.run_id req.pop('date_created', None) #created is non-writeable if 'status' in req and req['status'] not in [0, 30]: #only initialize or abort req.pop('status') #status unwriteable from web exists = run.get_ANRun( self.run_id ) if exists: self.app.logger.info("Update %s" % self.run_id ) result = run.update_ANRun( self.run_id, **req ) else: #all new runs start in the config status self.app.logger.info("Insert %s" % self.run_id ) req['status'] = run.CONFIG result = run.insert_ANRun( self.run_id, **req ) result = self._clean_response( result ) msg = {'status': 'complete', 'data': result } return (msg, 200)
def _get_gpu_run(self): for run in run_mdl.get_ANRun(): if run["master_name"] != self._master_name: # run does not belong to this master continue elif run["status"] in [run_mdl.ACTIVE_ALL_SENT, run_mdl.ACTIVE]: return run return None
def GET( self ): """ Returns a single run """ self.app.logger.info("Run.GET(%r)" % self.run_id) result = run.get_ANRun( self.run_id ) self.app.logger.info("%r" % result) result = self._clean_response( result ) if result: status = 200 else: status = 404 msg = {'status':'complete', 'data': result } return (msg, status)
def handle_state(self): self.get_responses() self.check_response() if not self.cluster_active: #cluster down self.set_status( svr_mdl.TERMINATED ) state = self.status self.logger.debug("handle_state[%s]" % state) if state == svr_mdl.INIT: self.send_init() elif state == svr_mdl.WAITING: if self._run_id is None: self.restart() elif run_mdl.get_ANRun( self._run_id )['status'] != run_mdl.ACTIVE: self._run_id = None self.restart() elif state == svr_mdl.TERMINATED: self.delete_queues() elif state==svr_mdl.RESTARTING and self._restart_timeout < datetime.now(): self.hard_restart()
def handle_heartbeat(self): while len(self.status_queue) > 0: mess = self.status_queue.pop() if "message" in mess: if mess["message"] == "terminated": self.set_status(svr_mdl.TERMINATED) return term = mess["terminating"] if term == 0: if mess["source-q"] == 0: if not self.status == svr_mdl.RESTARTING: run = run_mdl.get_ANRun(self._run_id) if run["status"] == run_mdl.COMPLETE: if self._complete_timeout is None: self._complete_timeout = datetime.now() + timedelta(minutes=1) if self._complete_timeout < datetime.now(): self._run_id = None self._restart() else: self._complete_timeout = None self._idle = 0 else: self._terminated = True
def run_config(self): if self._run_config is None: self._run_config = run_mdl.get_ANRun( self.run_id ) self.logger.debug("Run Config set %r" % self._run_config ) return self._run_config
from datadirac.aggregator.accumulator import Truthiness from datadirac.aggregator.controller import AggManager import masterdirac.models.run as run_mdl from datadirac.aggregator.accumulator import Accumulator import pandas #change this to current run id run_id = 'b6-q50-nov28-3' run_model = run_mdl.get_ANRun( run_id ) truth_obj = Truthiness( run_model ) truth = truth_obj._get_truth() accum = Accumulator(run_model) nets = accum.networks df = pandas.DataFrame( truth ) df.index = nets df.to_csv("%s.csv" % run_id)
def createEVApackage( run_id, windows ): """ Generate the files for EVA """ if not os.path.exists( run_id ): os.makedirs( run_id ) run_model = r_model.get_ANRun( run_id ) sd = run_model['source_data'] net_config = run_model['network_config'] #download source data ###DEBUG working_dir = os.path.join( os.getcwd(), run_id ) if not os.path.exists( working_dir ): os.makedirs( working_dir ) pandas_file = os.path.join( working_dir, "expression.pnd" ) if not os.path.exists( pandas_file ): _get_source_data( working_dir , run_model ) hdg = hdp.HDDataGen( working_dir ) df, _ = hdg.generate_dataframe( run_model['source_data'], run_model['network_config'] ) df.save( pandas_file ) sd_obj = dd.SourceData() sd_obj.load_dataframe( pandas_file ) net_table = run_model['network_config']['network_table'] net_source = run_model['network_config']['network_source'] sd_obj.load_net_info(net_table, net_source ) _, meta_file = os.path.split( run_model['source_data']['meta_file'] ) mi = dd.MetaInfo( os.path.join( run_id, meta_file ) ) strain = mi.get_strains() if len(strain) > 1: logging.warning("More than one strain, only getting first") logging.warning("Strains %r" % strain ) alleles = mi.get_nominal_alleles() if len( alleles ) > 2: logging.warning("More than two alleles, only using 'WT' and other") logging.warning("Alleles %r" % alleles ) if 'WT' not in alleles: raise Exception("Wild type not in alleles. Alleles = %r" % alleles) second_allele = [allele for allele in alleles if allele != 'WT'][0] wt_samples = mi.get_sample_ids( strain=strain[0], allele='WT' ) comp_samples = mi.get_sample_ids( strain=strain[0], allele = second_allele) assert len(wt_samples) > 0 assert len( comp_samples ) > 0 wt_s_a = sorted( [(mi.get_age( sid), sid) for sid in wt_samples] ) comp_s_a = sorted( [(mi.get_age( sid), sid) for sid in comp_samples] ) comparisons = {} gene_names_fname = "gene_names.txt" with open(os.path.join(working_dir , gene_names_fname), 'w') as gnf: gnf.write('\n'.join(['"%s"' % gn for gn in sd_obj.source_dataframe.index])) logging.info("Wrote %s" % gene_names_fname ) network_fname = "net.gmt" with open( os.path.join(working_dir, network_fname), 'w') as nf: for pw in sd_obj.get_pathways(): nf.write( '\t'.join([pw, 'na'] + sd_obj.get_genes( pw )) + '\n' ) logging.info("Wrote %s" % network_fname ) for start, end in windows: comparisons[(start, end)] = ( window( start, end, wt_s_a), window( start, end, comp_s_a)) result = {} for win, v in comparisons.iteritems(): window_pattern = "start%iend%i" % win wt_s, comp_s = v curr_df = sd_obj.get_expression( wt_s + comp_s ) exp_table_fname = "%s.expression.tsv" % (window_pattern) curr_df.to_csv( os.path.join(working_dir, exp_table_fname), index=False, header=False, sep='\t') pheno_fname = "%s.pheno" % ( window_pattern) with open( os.path.join(working_dir, pheno_fname), 'w') as ph: for s in wt_s: ph.write('0\n') for s in comp_s: ph.write('1\n') params = ( exp_table_fname, gene_names_fname, network_fname, pheno_fname, "%s.%s.result.txt" % (run_id, window_pattern )) params = tuple([ os.path.join(run_id,p) for p in params]) fin, mess = EVA( *params ) for m in mess: if len(m[1].strip()) > 0: logging.info("%s: %s" % (m[0], m[1])) result[win] = parse_result( params[-1] ) #DEBUG t = result.keys()[0] n = result[t].keys()[0] for dt in result[t][n].keys(): save_table( result, "%s.%s.csv" % (run_id, dt), val_type=dt ) return result
line = sc_p.stdout.readline().strip() while line != '': messages.append(('stdout', line)) line = sc_p.stdout.readline().strip() line = sc_p.stderr.readline().strip() while line != '': messages.append(('stderr', line)) line = sc_p.stderr.readline().strip() messages.append(('wrapper', 'Complete: returned[%i]' % cont)) return (cont, messages) if __name__ == "__main__": #get runs we've already completed complete = get_complete_run_ids( 'eva-results' ) logging.basicConfig(level=logging.DEBUG, filename="megarun.log") #loops over runs for r in r_model.get_ANRun(): if r['run_id'] in ['fvb-biocarta']: #if r['status'] == 20 and r['run_id'][:4] not in ['test', 'lab-', 'joc-']: # if r['run_id'] in complete: # logging.warning("Skipping %s. Already exists" % (r['run_id'],)) # continue run_id = r['run_id'] windows = [(i, i+5) for i in range(4,16)] + [(4,20), (4,12), (12,20)] try: eva_res = createEVApackage(run_id, windows) except: logging.exception("Error running Eva")
from pynamodb.attributes import UnicodeAttribute import os import os.path from multiprocessing import Pool class NetworkInfo(Model): class Meta: table_name = 'net_info_table' src_id = UnicodeAttribute(hash_key=True) pw_id = UnicodeAttribute(range_key=True) broad_url=UnicodeAttribute(default='') gene_ids=UnicodeAttribute(default='') runs = run_mdl.get_ANRun() def writeit( r): try: s_run = r['run_id'].split('-') if 'trn' not in s_run or r['status'] != run_mdl.COMPLETE: return if r['run_id'] in ['b6-q111-kegg', 'fvb-analysis1']: return if r['status'] != run_mdl.COMPLETE: return print r['run_id'] net_table = r['network_config']['network_table'] net_source_id = r['network_config']['network_source'] source_dataframe = r['dest_data']['dataframe_file'] metadata_file = r['dest_data']['meta_file']
def get_run_model(self): self.logger.info("Getting run[%s] info" % self.run_id ) return run_mdl.get_ANRun( self.run_id )
def _get_active_run( self ): for run in run_mdl.get_ANRun(): if run['master_name'] == self._master_name: if run['status'] == run_mdl.ACTIVE: return run return None