def update_users_statistics(scard, params): strn = "SELECT Total_UserSubmissions FROM Users WHERE User = '******';".format( params['username']) UserSubmissions_total = utils.db_grab(strn)[0][0] UserSubmissions_total += 1 strn = "UPDATE Users SET Total_UserSubmissions = '{0}' WHERE User = '******';".format( UserSubmissions_total, params['username']) utils.db_write(strn) strn = "SELECT Total_Jobs FROM Users WHERE User = '******';".format( params['username']) jobs_total = utils.db_grab(strn)[0][0] jobs_total += int(scard.data['jobs']) strn = "UPDATE Users SET Total_Jobs = '{0}' WHERE User = '******';".format( jobs_total, params['username']) utils.db_write(strn) if 'nevents' in scard.data: strn = "SELECT Total_Events FROM Users WHERE User = '******';".format( params['username']) events_total = utils.db_grab(strn)[0][0] events_total += int(scard.data['jobs']) * int(scard.data['nevents']) strn = "UPDATE Users SET Total_Events = '{0}' WHERE User = '******';".format( events_total, params['username']) utils.db_write(strn) else: utils.printer( """Since you are using custom LUND files, we are not able to update your usage statistics. This will not affect your simulations in any way, but will affect the total number of events reported as processing through our system. """) strn = "UPDATE Users SET Most_Recent_Active_Date = '{0}' WHERE User = '******';".format( utils.gettime(), params['username']) utils.db_write(strn)
def user_validation(): #These next two lines are good but do not work on python < 2.7 #username = (subprocess.check_output('whoami'))[:-1]#The [:-1] is so we drop the implicit \n from the string #domain_name = subprocess.check_output(['hostname','-d'])#socket.getfqdn() #socket.getdomain_name() username = Popen(['whoami'], stdout=PIPE).communicate()[0].split()[0] is_travis = 'TRAVIS' in os.environ if is_travis == True: print("We're at travis-ci environment") fake_domain = "travis.dev" domain_name = fake_domain else: #The following does not work on mac. This needs to get resolved, currently bridged over for testing #domain_name = Popen(['hostname',''-d'], stdout=PIPE).communicate()[0].split()[0] domain_name = "example_domain" strn = """SELECT 1 FROM Users WHERE EXISTS (SELECT 1 FROM Users WHERE User ="******" AND domain_name = "{1}")""".format(username,domain_name) user_already_exists = utils.db_grab(strn) if not user_already_exists: print("""\nThis is the first time {0} from {1} has submitted jobs. Adding user to database""".format(username,domain_name)) strn = """INSERT INTO Users(User, domain_name, JoinDateStamp, Total_UserSubmissions, Total_Jobs, Total_Events, Most_Recent_Active_Date) VALUES ("{0}","{1}","{2}","{3}","{4}","{5}","{6}");""".format( username,domain_name,utils.gettime(),0,0,0,"Null") utils.db_write(strn) return username
def cookie(name, value, tlength, path='/', domain=inet_addr): t = utils.gettime(tlength).split() expires = t[0] + ', ' + t[2] + ' ' + t[1] + ' ' + t[-1] + ' ' + t[-2] + ' GMT' cookie = name + '=' + value + '; ' + 'expires=' + expires + '; ' \ + 'path=' + path + '; ' + 'domain=' + domain return cookie
def Lund_Downloader(lund_url_base,lund_download_dir,lund_filename,single_file=True): lund_content = "" try: #print("Trying to download {} file from {}".format(lund_filename,lund_url_base)) full_lund_path = lund_url_base if not single_file: full_lund_path += "/"+lund_filename lund_raw_text = html_reader.html_reader(full_lund_path)[0]#This returns a tuple, we need the contents of the tuple lund_raw_text = str(lund_raw_text) #This might not be needed, converts from bytes to strings lund_content = lund_raw_text.replace('"',"'") #This isn't strictly needed but SQLite can't read " into data fields, only ' characters #print("Downloaded {}".format(full_lund_path)) except Exception as e: print("Unable to download lund file sucessfully.") print("The error encountered was: \n {}".format(e)) f = open("lundException.txt", "a") f.write("\n an exception was encountered at {}, see below: \n".format(utils.gettime())) f.write(str(e)) f.close() if len(lund_content)>0: try: #print("Trying to save {}".format(lund_filename)) filename = lund_download_dir+"/"+lund_filename with open(filename,"a") as file: file.write(lund_content) #print("Saved {} to {}{}".format(lund_filename,lund_download_dir,lund_filename)) except Exception as e: print("Unable to save lund file sucessfully.") print("The error encountered was: \n {}".format(e)) f = open("lundException.txt", "a") f.write("\n an exception was encountered at {}, see below: \n".format(utils.gettime())) f.write(str(e)) f.close()
def htcondor_submit(args, scard, GcardID, file_extension, params): """ if value in submission === not submitted""" # Need to add condition here in case path is different for non-jlab scripts_baseDir = "/group/clas12/SubMit" condor_exec = scripts_baseDir + "/server/condor_submit.sh" jobOutputDir = "/volatile/clas12/osg" # don't know how to pass farmsubmissionID (4th argument), passing GcardID for now (it may be the same) submission = Popen([ condor_exec, scripts_baseDir, jobOutputDir, params['username'], str(GcardID) ], stdout=PIPE).communicate()[0] print(submission) words = submission.split() node_number = words[len(words) - 1] #This might only work on SubMIT strn = "UPDATE FarmSubmissions SET run_status = 'submitted to pool' WHERE GcardID = '{0}';".format( GcardID) utils.db_write(strn) timestamp = utils.gettime( ) # Can modify this if need 10ths of seconds or more resolution strn = "UPDATE FarmSubmissions SET submission_timestamp = '{0}' WHERE GcardID = '{1}';".format( timestamp, GcardID) utils.db_write(strn) strn = "UPDATE FarmSubmissions SET pool_node = '{0}' WHERE GcardID = '{1}';".format( node_number, GcardID) utils.db_write(strn)
def simulate_period(period_idx, period, processes, entities, init=False): print("\nperiod", period) if init: for entity in entities: print(" * %s: %d individuals" % (entity.name, len(entity.array))) else: print("- loading input data") for entity in entities: print(" *", entity.name, "...", end=' ') timed(entity.load_period_data, period) print(" -> %d individuals" % len(entity.array)) for entity in entities: entity.array_period = period entity.array['period'] = period if processes: # build context for this period: const_dict = {'__simulation__': self, 'period': period, 'nan': float('nan'), '__globals__': globals_data} num_processes = len(processes) for p_num, process_def in enumerate(processes, start=1): process, periodicity = process_def print("- %d/%d" % (p_num, num_processes), process.name, end=' ') print("...", end=' ') if period_idx % periodicity == 0: elapsed, _ = gettime(process.run_guarded, self, const_dict) else: elapsed = 0 print("skipped (periodicity)") process_time[process.name] += elapsed if config.show_timings: print("done (%s elapsed)." % time2str(elapsed)) else: print("done.") self.start_console(process.entity, period, globals_data) print("- storing period data") for entity in entities: print(" *", entity.name, "...", end=' ') timed(entity.store_period_data, period) print(" -> %d individuals" % len(entity.array)) # print " - compressing period data" # for entity in entities: # print " *", entity.name, "...", # for level in range(1, 10, 2): # print " %d:" % level, # timed(entity.compress_period_data, level) period_objects[period] = sum(len(entity.array) for entity in entities)
def simulate_period(period_idx, period, processes, entities, init=False): print "\nperiod", period if init: for entity in entities: print " * %s: %d individuals" % (entity.name, len(entity.array)) else: print "- loading input data" for entity in entities: print " *", entity.name, "...", timed(entity.load_period_data, period) print " -> %d individuals" % len(entity.array) for entity in entities: entity.array_period = period entity.array["period"] = period if processes: # build context for this period: const_dict = {"period": period, "nan": float("nan"), "__globals__": globals_data} num_processes = len(processes) for p_num, process_def in enumerate(processes, start=1): process, periodicity = process_def print "- %d/%d" % (p_num, num_processes), process.name, # TODO: provide a custom __str__ method for Process & # Assignment instead if hasattr(process, "predictor") and process.predictor and process.predictor != process.name: print "(%s)" % process.predictor, print "...", if period_idx % periodicity == 0: elapsed, _ = gettime(process.run_guarded, self, const_dict) else: elapsed = 0 print "skipped (periodicity)" process_time[process.name] += elapsed if config.show_timings: print "done (%s elapsed)." % time2str(elapsed) else: print "done." self.start_console(process.entity, period, globals_data) print "- storing period data" for entity in entities: print " *", entity.name, "...", timed(entity.store_period_data, period) print " -> %d individuals" % len(entity.array) # print " - compressing period data" # for entity in entities: # print " *", entity.name, "...", # for level in range(1, 10, 2): # print " %d:" % level, # timed(entity.compress_period_data, level) period_objects[period] = sum(len(entity.array) for entity in entities)
def farm_submission_manager(args, usub_id, file_extension, scard, params, db_conn, sql): timestamp = utils.gettime() farm_name = "OSG" #For now we are hardcodign OSG as it is the only farm that is currently supported if farm_name == "OSG": utils.printer("Passing to htcondor_submit") htcondor_submit.htcondor_submit(args,scard,usub_id,file_extension,params, db_conn, sql) else: raise ValueError('Unable to submit for {}'.format( farm_name ))
def printbattledata(battledata): filename = 'log/' + utils.gettime() + '.log' f = io.open(filename, 'w', encoding = 'utf-8') f.write(json.dumps(battledata, ensure_ascii = False, indent = 4, separators = (',',': '))) for data in battledata['battleData']: #utils.printjson(data) if data['cmd'] == 'R': print 'ROUND #' + data['rounds'] + ',' elif data['cmd'] == 'A': print 'CHARA #' + data['actorid'] + ' moves.', data['points'][0:2] + u'玉,', data['points'][2:4] + u'魔力' elif data['cmd'] == 'D': for unattack in data['unattack']: print 'CHARA #' + data['attackid'] + ' attacks CHARA #'+ unattack['unattackid'] + ', damage = ' + unattack['damage'] + db.damagetypes[unattack['damagetype']] + ', type = ' + db.attacktypes[unattack['attacktype']] elif data['cmd'] == 'S': print 'SPELL: ' + data['spellname'] elif data['cmd'] == 'RESULT': print 'RESULT: ' + data['result'] else: print 'UNKNOWN cmd ' + data['cmd'] utils.printjson(data) return
def htcondor_submit(args, GcardID, file_extension): """ if value in submission === not submitted""" runscript_file = file_struct.runscript_file_obj.file_base + file_extension + file_struct.runscript_file_obj.file_end clas12condor_file = file_struct.condor_file_obj.file_base + file_extension + file_struct.condor_file_obj.file_end condorfile = 'submission_files/' + 'condor_files/' + clas12condor_file subprocess.call([ 'chmod', '+x', file_struct.runscript_file_obj.file_path + runscript_file ]) subprocess.call(['chmod', '+x', "condor_wrapper"]) submission = Popen(['condor_submit', condorfile], stdout=PIPE).communicate()[0] #The below is for testing purposes #submission = """Submitting job(s)... #3 job(s) submitted to cluster 7334290.""" print(submission) words = submission.split() node_number = words[len(words) - 1] #This might only work on SubMIT print(node_number) strn = "UPDATE Submissions SET run_status = 'submitted to pool' WHERE GcardID = '{0}';".format( GcardID) utils.sql3_exec(strn) timestamp = utils.gettime( ) # Can modify this if need 10ths of seconds or more resolution strn = "UPDATE Submissions SET submission_timestamp = '{0}' WHERE GcardID = '{1}';".format( timestamp, GcardID) utils.sql3_exec(strn) strn = "UPDATE Submissions SET pool_node = '{0}' WHERE GcardID = '{1}';".format( node_number, GcardID) utils.sql3_exec(strn)
def simulate_period(period_idx, period, periods, processes, entities, init=False): print("\nperiod", period) if init: for entity in entities: print(" * %s: %d individuals" % (entity.name, len(entity.array))) else: print("- loading input data") for entity in entities: print(" *", entity.name, "...", end=' ') timed(entity.load_period_data, period) print(" -> %d individuals" % len(entity.array)) for entity in entities: entity.array_period = period entity.array['period'] = period if processes: # build context for this period: const_dict = {'period_idx': period_idx+1, 'periods': periods, 'periodicity': time_period[self.time_scale]*(1 - 2*(self.retro)), 'format_date': self.time_scale, 'nan': float('nan'), '__globals__': globals_data} assert(periods[period_idx+1] == period) num_processes = len(processes) for p_num, process_def in enumerate(processes, start=1): process, periodicity, start = process_def print("- %d/%d" % (p_num, num_processes), process.name, end=' ') #TODO: provide a custom __str__ method for Process & # Assignment instead if hasattr(process, 'predictor') and process.predictor \ and process.predictor != process.name: print("(%s)" % process.predictor, end=' ') print("...", end=' ') # TDOD: change that if isinstance(periodicity, int ): if period_idx % periodicity == 0: elapsed, _ = gettime(process.run_guarded, self, const_dict) else: elapsed = 0 print("skipped (periodicity)") else: assert (periodicity in time_period) periodicity_process = time_period[periodicity] periodicity_simul = time_period[self.time_scale] month_idx = period % 100 # first condition, to run a process with start == 12 # each year even if year are yyyy01 #modify start if periodicity_simul is not month start = int(start/periodicity_simul-0.01)*periodicity_simul + 1 if (periodicity_process <= periodicity_simul and self.time_scale != 'year0') or \ month_idx % periodicity_process == start % periodicity_process: const_dict['periodicity'] = periodicity_process*(1 - 2*(self.retro)) elapsed, _ = gettime(process.run_guarded, self, const_dict) else: elapsed = 0 print("skipped (periodicity)") process_time[process.name] += elapsed if config.show_timings: print("done (%s elapsed)." % time2str(elapsed)) else: print("done.") self.start_console(process.entity, period, globals_data) # pdb.set_trace() #self.entities[2].table print("- storing period data") for entity in entities: print(" *", entity.name, "...", end=' ') timed(entity.store_period_data, period) print(" -> %d individuals" % len(entity.array)) # print " - compressing period data" # for entity in entities: # print " *", entity.name, "...", # for level in range(1, 10, 2): # print " %d:" % level, # timed(entity.compress_period_data, level) period_objects[period] = sum(len(entity.array) for entity in entities)
def create_json_dict(args): db_conn, sql = connect_to_database(args.lite) condor_info = get_condor_q.get_condor_q(args) batch_ids = condor_info[0] total_jobs_submitted = condor_info[1] total_jobs_running = condor_info[2] jobs_start_dates = condor_info[3] idle_jobs = condor_info[4][1] running_jobs = condor_info[4][2] held_jobs = condor_info[4][5] footer_placeholder_text = "Total for all users: 14598 jobs; 0 completed, 0 removed, 12378 idle, 1903 running, 317 held, 0 suspended" footer = footer_placeholder_text json_dict = {} json_dict['metadata'] = { 'update_timestamp': gettime(), 'footer': footer, } json_dict['user_data'] = [] for index, osg_id in enumerate(batch_ids): jobs_total = total_jobs_submitted[index] jobs_done = jobs_total - total_jobs_running[index] jobs_idle = idle_jobs[index] jobs_running = running_jobs[index] jobs_held = held_jobs[index] jobs_start = utils.unixtimeconvert(jobs_start_dates[index], "eastern") sql.execute( "SELECT COUNT(pool_node) FROM submissions WHERE pool_node = {}". format(osg_id)) count = sql.fetchall()[0][0] #I dont get exactly what is going on here. How can we have a zero in the DB but nonzero in condor? #looking at this more, it is only 1 if exists in db, or 0 if not in db if count > 0: # Get information from database to connect with this job sql.execute( "SELECT user,user_submission_id FROM submissions WHERE pool_node = {}" .format(osg_id)) user, farm_sub_id = sql.fetchall()[0] user_info = [ user, farm_sub_id, jobs_start, jobs_total, jobs_done, jobs_running, jobs_idle, jobs_held, osg_id ] user_data = {} for index, key in enumerate(fs.user_data_keys): user_data[key] = user_info[index] user_data = enforce_preferential_key_ordering( user_data, fs.user_data_keys) json_dict['user_data'].append(user_data) else: print('Skipping {}'.format(osg_id)) db_conn.close() # Nothing was added if not json_dict['user_data']: user_data = {} for index, key in enumerate(fs.user_data_keys): user_data[key] = fs.null_user_info[index] json_dict['user_data'].append( enforce_preferential_key_ordering(user_data, fs.user_data_keys)) return json_dict
#for f1, f2 in zip(filesRF[file:file+1], filesPD[file:file+1]): for f1, f2 in zip(filesRF[:2], filesPD[:2]): print f1 wfRF = utils.readscopefile(f1) wfPD = utils.readscopefile(f2) sim = simulation.Simulation(det=det,sampling=5e9) sim.time = wfRF[0] sim.noise = wfRF[1] simwf = waveform.Waveform(sim.time,sim.noise, type='hf') wf = det.producesimwaveform(simwf,'powerdetector',met) print len(wfPD[1]), ' ', len(wf.amp) print len(sim.time), ' ', len(wf.time) # [amp1, amp2]= utils.alignwaveform2(wf.amp,wfPD[1],False) [amp1, amp2]= utils.alignwaveform2(wfPD[1],wf.amp) [time1, time2] = utils.gettime(wf.time,wfPD[0]) realwf = waveform.Waveform(wfPD[0],wfPD[1],type='powerdet') acamp1 = amp1 - np.mean(amp1) acamp2 = amp2 - np.mean(amp2) size1 = len(acamp1) acamp1 = acamp1[size1/5:-size1/5] acamp2 = acamp2[size1/5:-size1/5] diff = np.append(diff,acamp2-acamp1) if fcount == file: ex1 = acamp1 ex2 = acamp2 extime = time1[size1/5:-size1/5]*1e6 if met == 1: exrf = wfRF[1] timerf = wfRF[0]*1e6 ax1.plot(timerf,exrf)
def simulate_period(period_idx, period, periods, processes, entities, init=False): period_start_time = time.time() # set current period eval_ctx.period = period if config.log_level in ("procedures", "processes"): print() print("period", period, end=" " if config.log_level == "periods" else "\n") if init and config.log_level in ("procedures", "processes"): for entity in entities: print(" * %s: %d individuals" % (entity.name, len(entity.array))) else: if config.log_level in ("procedures", "processes"): print("- loading input data") for entity in entities: print(" *", entity.name, "...", end=' ') timed(entity.load_period_data, period) print(" -> %d individuals" % len(entity.array)) else: for entity in entities: entity.load_period_data(period) for entity in entities: entity.array_period = period entity.array['period'] = period if processes: # build context for this period: const_dict = {'period_idx': period_idx + 1, 'periods': periods, 'periodicity': time_period[self.time_scale] * (1 - 2 * (self.retro)), 'longitudinal': self.longitudinal, 'format_date': self.time_scale, 'pension': None, '__simulation__': self, 'period': period, 'nan': float('nan'), '__globals__': globals_data} assert(periods[period_idx + 1] == period) num_processes = len(processes) for p_num, process_def in enumerate(processes, start=1): process, periodicity, start = process_def if config.log_level in ("procedures", "processes"): print("- %d/%d" % (p_num, num_processes), process.name, end=' ') print("...", end=' ') # TDOD: change that if isinstance(periodicity, int): if period_idx % periodicity == 0: elapsed, _ = gettime(process.run_guarded, self, const_dict) else: elapsed = 0 print("skipped (periodicity)") else: assert periodicity in time_period periodicity_process = time_period[periodicity] periodicity_simul = time_period[self.time_scale] month_idx = period % 100 # first condition, to run a process with start == 12 # each year even if year are yyyy01 # modify start if periodicity_simul is not month start = int(start / periodicity_simul - 0.01) * periodicity_simul + 1 if (periodicity_process <= periodicity_simul and self.time_scale != 'year0') or ( month_idx % periodicity_process == start % periodicity_process): const_dict['periodicity'] = periodicity_process * (1 - 2 * (self.retro)) elapsed, _ = gettime(process.run_guarded, self, const_dict) else: elapsed = 0 if config.log_level in ("procedures", "processes"): print("skipped (periodicity)") process_time[process.name] += elapsed if config.log_level in ("procedures", "processes"): if config.show_timings: print("done (%s elapsed)." % time2str(elapsed)) else: print("done.") self.start_console(eval_ctx) # update longitudinal person = [x for x in entities if x.name == 'person'][0] # maybe we have a get_entity or anything more nice than that #TODO: check id = person.array.columns['id'] for varname in ['sali', 'workstate']: var = person.array.columns[varname] if init: fpath = self.data_source.input_path input_file = HDFStore(fpath, mode="r") if 'longitudinal' in input_file.root: input_longitudinal = input_file.root.longitudinal if varname in input_longitudinal: self.longitudinal[varname] = input_file['/longitudinal/' + varname] if period not in self.longitudinal[varname].columns: table = DataFrame({'id': id, period: var}) self.longitudinal[varname] = self.longitudinal[varname].merge( table, on='id', how='outer') else: # when one variable is not in the input_file self.longitudinal[varname] = DataFrame({'id': id, period: var}) else: # when there is no longitudinal in the dataset self.longitudinal[varname] = DataFrame({'id': id, period: var}) else: table = DataFrame({'id': id, period: var}) if period in self.longitudinal[varname]: import pdb pdb.set_trace() self.longitudinal[varname] = self.longitudinal[varname].merge(table, on='id', how='outer') if config.log_level in ("procedures", "processes"): print("- storing period data") for entity in entities: print(" *", entity.name, "...", end=' ') timed(entity.store_period_data, period) print(" -> %d individuals" % len(entity.array)) else: for entity in entities: entity.store_period_data(period) # print " - compressing period data" # for entity in entities: # print " *", entity.name, "...", # for level in range(1, 10, 2): # print " %d:" % level, # timed(entity.compress_period_data, level) period_objects[period] = sum(len(entity.array) for entity in entities) period_elapsed_time = time.time() - period_start_time if config.log_level in ("procedures", "processes"): print("period %d" % period, end=' ') print("done", end=' ') if config.show_timings: print("(%s elapsed)" % time2str(period_elapsed_time), end="") if init: print(".") else: main_elapsed_time = time.time() - main_start_time periods_done = period_idx + 1 remaining_periods = self.periods - periods_done avg_time = main_elapsed_time / periods_done # future_time = period_elapsed_time * 0.4 + avg_time * 0.6 remaining_time = avg_time * remaining_periods print(" - estimated remaining time: %s." % time2str(remaining_time)) else: print()
def run(self, run_console=False): start_time = time.time() h5in, h5out, globals_data = timed(self.data_source.run, self.globals_def, entity_registry, self.init_period) if config.autodump or config.autodiff: if config.autodump: fname, _ = config.autodump mode = 'w' else: # config.autodiff fname, _ = config.autodiff mode = 'r' fpath = os.path.join(config.output_directory, fname) h5_autodump = tables.open_file(fpath, mode=mode) config.autodump_file = h5_autodump else: h5_autodump = None # input_dataset = self.data_source.run(self.globals_def, # entity_registry) # output_dataset = self.data_sink.prepare(self.globals_def, # entity_registry) # output_dataset.copy(input_dataset, self.init_period - 1) # for entity in input_dataset: # indexed_array = buildArrayForPeriod(entity) # tell numpy we do not want warnings for x/0 and 0/0 np.seterr(divide='ignore', invalid='ignore') process_time = defaultdict(float) period_objects = {} eval_ctx = EvaluationContext(self, self.entities_map, globals_data) def simulate_period(period_idx, period, periods, processes, entities, init=False): period_start_time = time.time() # set current period eval_ctx.period = period if config.log_level in ("procedures", "processes"): print() print("period", period, end=" " if config.log_level == "periods" else "\n") if init and config.log_level in ("procedures", "processes"): for entity in entities: print(" * %s: %d individuals" % (entity.name, len(entity.array))) else: if config.log_level in ("procedures", "processes"): print("- loading input data") for entity in entities: print(" *", entity.name, "...", end=' ') timed(entity.load_period_data, period) print(" -> %d individuals" % len(entity.array)) else: for entity in entities: entity.load_period_data(period) for entity in entities: entity.array_period = period entity.array['period'] = period if processes: # build context for this period: const_dict = {'period_idx': period_idx + 1, 'periods': periods, 'periodicity': time_period[self.time_scale] * (1 - 2 * (self.retro)), 'longitudinal': self.longitudinal, 'format_date': self.time_scale, 'pension': None, '__simulation__': self, 'period': period, 'nan': float('nan'), '__globals__': globals_data} assert(periods[period_idx + 1] == period) num_processes = len(processes) for p_num, process_def in enumerate(processes, start=1): process, periodicity, start = process_def if config.log_level in ("procedures", "processes"): print("- %d/%d" % (p_num, num_processes), process.name, end=' ') print("...", end=' ') # TDOD: change that if isinstance(periodicity, int): if period_idx % periodicity == 0: elapsed, _ = gettime(process.run_guarded, self, const_dict) else: elapsed = 0 print("skipped (periodicity)") else: assert periodicity in time_period periodicity_process = time_period[periodicity] periodicity_simul = time_period[self.time_scale] month_idx = period % 100 # first condition, to run a process with start == 12 # each year even if year are yyyy01 # modify start if periodicity_simul is not month start = int(start / periodicity_simul - 0.01) * periodicity_simul + 1 if (periodicity_process <= periodicity_simul and self.time_scale != 'year0') or ( month_idx % periodicity_process == start % periodicity_process): const_dict['periodicity'] = periodicity_process * (1 - 2 * (self.retro)) elapsed, _ = gettime(process.run_guarded, self, const_dict) else: elapsed = 0 if config.log_level in ("procedures", "processes"): print("skipped (periodicity)") process_time[process.name] += elapsed if config.log_level in ("procedures", "processes"): if config.show_timings: print("done (%s elapsed)." % time2str(elapsed)) else: print("done.") self.start_console(eval_ctx) # update longitudinal person = [x for x in entities if x.name == 'person'][0] # maybe we have a get_entity or anything more nice than that #TODO: check id = person.array.columns['id'] for varname in ['sali', 'workstate']: var = person.array.columns[varname] if init: fpath = self.data_source.input_path input_file = HDFStore(fpath, mode="r") if 'longitudinal' in input_file.root: input_longitudinal = input_file.root.longitudinal if varname in input_longitudinal: self.longitudinal[varname] = input_file['/longitudinal/' + varname] if period not in self.longitudinal[varname].columns: table = DataFrame({'id': id, period: var}) self.longitudinal[varname] = self.longitudinal[varname].merge( table, on='id', how='outer') else: # when one variable is not in the input_file self.longitudinal[varname] = DataFrame({'id': id, period: var}) else: # when there is no longitudinal in the dataset self.longitudinal[varname] = DataFrame({'id': id, period: var}) else: table = DataFrame({'id': id, period: var}) if period in self.longitudinal[varname]: import pdb pdb.set_trace() self.longitudinal[varname] = self.longitudinal[varname].merge(table, on='id', how='outer') if config.log_level in ("procedures", "processes"): print("- storing period data") for entity in entities: print(" *", entity.name, "...", end=' ') timed(entity.store_period_data, period) print(" -> %d individuals" % len(entity.array)) else: for entity in entities: entity.store_period_data(period) # print " - compressing period data" # for entity in entities: # print " *", entity.name, "...", # for level in range(1, 10, 2): # print " %d:" % level, # timed(entity.compress_period_data, level) period_objects[period] = sum(len(entity.array) for entity in entities) period_elapsed_time = time.time() - period_start_time if config.log_level in ("procedures", "processes"): print("period %d" % period, end=' ') print("done", end=' ') if config.show_timings: print("(%s elapsed)" % time2str(period_elapsed_time), end="") if init: print(".") else: main_elapsed_time = time.time() - main_start_time periods_done = period_idx + 1 remaining_periods = self.periods - periods_done avg_time = main_elapsed_time / periods_done # future_time = period_elapsed_time * 0.4 + avg_time * 0.6 remaining_time = avg_time * remaining_periods print(" - estimated remaining time: %s." % time2str(remaining_time)) else: print() print(""" ===================== starting simulation =====================""") try: assert(self.time_scale in time_period) month_periodicity = time_period[self.time_scale] time_direction = 1 - 2 * (self.retro) time_step = month_periodicity * time_direction periods = [ self.init_period + int(t / 12) * 100 + t % 12 for t in range(0, (self.periods + 1) * time_step, time_step) ] if self.time_scale == 'year0': periods = [self.init_period + t for t in range(0, (self.periods + 1))] print("simulated period are going to be: ", periods) init_start_time = time.time() simulate_period(0, self.init_period, [None, periods[0]], self.init_processes, self.entities, init=True) time_init = time.time() - init_start_time main_start_time = time.time() for period_idx, period in enumerate(periods[1:]): period_start_time = time.time() simulate_period(period_idx, period, periods, self.processes, self.entities) # if self.legislation: # if not self.legislation['ex_post']: # # elapsed, _ = gettime(liam2of.main,period) # process_time['liam2of'] += elapsed # elapsed, _ = gettime(of_on_liam.main,self.legislation['annee'],[period]) # process_time['legislation'] += elapsed # elapsed, _ = gettime(merge_leg.merge_h5,self.data_source.output_path, # "C:/Til/output/"+"simul_leg.h5",period) # process_time['merge_leg'] += elapsed time_elapsed = time.time() - period_start_time print("period %d done" % period, end=' ') if config.show_timings: print("(%s elapsed)." % time2str(time_elapsed)) else: print() total_objects = sum(period_objects[period] for period in periods) total_time = time.time() - main_start_time # if self.legislation: # if self.legislation['ex_post']: # # elapsed, _ = gettime(liam2of.main) # process_time['liam2of'] += elapsed # elapsed, _ = gettime(of_on_liam.main,self.legislation['annee']) # process_time['legislation'] += elapsed # # TODO: faire un programme a part, so far ca ne marche pas pour l'ensemble # # adapter n'est pas si facile, comme on veut economiser une table, # # on ne peut pas faire de append directement parce qu on met 2010 apres 2011 # # a un moment dans le calcul # elapsed, _ = gettime(merge_leg.merge_h5,self.data_source.output_path, # "C:/Til/output/"+"simul_leg.h5",None) # process_time['merge_leg'] += elapsed if self.final_stat: elapsed, _ = gettime(start, period) process_time['Stat'] += elapsed total_time = time.time() - main_start_time time_year = 0 if len(periods) > 1: nb_year_approx = periods[-1] / 100 - periods[1] / 100 if nb_year_approx > 0: time_year = total_time / nb_year_approx try: ind_per_sec = str(int(total_objects / total_time)) except ZeroDivisionError: ind_per_sec = 'inf' print(""" ========================================== simulation done ========================================== * %s elapsed * %d individuals on average * %s individuals/s/period on average * %s second for init_process * %s time/period in average * %s time/year in average ========================================== """ % ( time2str(time.time() - start_time), total_objects / self.periods, ind_per_sec, time2str(time_init), time2str(total_time / self.periods), time2str(time_year)) ) show_top_processes(process_time, 10) # if config.debug: # show_top_expr() if run_console: console_ctx = eval_ctx.clone(entity_name=self.default_entity) c = console.Console(console_ctx) c.run() finally: if h5in is not None: h5in.close() h5out.close() if h5_autodump is not None: h5_autodump.close()
def simulate_period(period_idx, period, processes, entities, init=False): period_start_time = time.time() # set current period eval_ctx.period = period if config.log_level in ("functions", "processes"): print() print("period", period, end=" " if config.log_level == "periods" else "\n") if init and config.log_level in ("functions", "processes"): for entity in entities: print(" * %s: %d individuals" % (entity.name, len(entity.array))) else: if config.log_level in ("functions", "processes"): print("- loading input data") for entity in entities: print(" *", entity.name, "...", end=' ') timed(entity.load_period_data, period) print(" -> %d individuals" % len(entity.array)) else: for entity in entities: entity.load_period_data(period) for entity in entities: entity.array_period = period entity.array['period'] = period if processes: num_processes = len(processes) for p_num, process_def in enumerate(processes, start=1): process, periodicity = process_def # set current entity eval_ctx.entity_name = process.entity.name if config.log_level in ("functions", "processes"): print("- %d/%d" % (p_num, num_processes), process.name, end=' ') print("...", end=' ') if period_idx % periodicity == 0: elapsed, _ = gettime(process.run_guarded, eval_ctx) else: elapsed = 0 if config.log_level in ("functions", "processes"): print("skipped (periodicity)") process_time[process.name] += elapsed if config.log_level in ("functions", "processes"): if config.show_timings: print("done (%s elapsed)." % time2str(elapsed)) else: print("done.") self.start_console(eval_ctx) if config.log_level in ("functions", "processes"): print("- storing period data") for entity in entities: print(" *", entity.name, "...", end=' ') timed(entity.store_period_data, period) print(" -> %d individuals" % len(entity.array)) else: for entity in entities: entity.store_period_data(period) # print " - compressing period data" # for entity in entities: # print " *", entity.name, "...", # for level in range(1, 10, 2): # print " %d:" % level, # timed(entity.compress_period_data, level) period_objects[period] = sum(len(entity.array) for entity in entities) period_elapsed_time = time.time() - period_start_time if config.log_level in ("functions", "processes"): print("period %d" % period, end=' ') print("done", end=' ') if config.show_timings: print("(%s elapsed)" % time2str(period_elapsed_time), end="") if init: print(".") else: main_elapsed_time = time.time() - main_start_time periods_done = period_idx + 1 remaining_periods = self.periods - periods_done avg_time = main_elapsed_time / periods_done # future_time = period_elapsed_time * 0.4 + avg_time * 0.6 remaining_time = avg_time * remaining_periods print(" - estimated remaining time: %s." % time2str(remaining_time)) else: print()
def Lund_Entry(lund_location, lund_download_dir="lund_dir/"): valid_lund_extensions = ['.dat', '.txt', '.lund'] #Make sure lund_download_dir ends with a /, and if not, add one. if lund_download_dir[-1] != "/": lund_download_dir += "/" # A case used to work around not downloading for types 1/3 if lund_location == "no_download": print('Not downloading files due to SCard type.') return lund_location elif os.path.exists(lund_download_dir): print('Lund directory already exists, not downloading again.') return lund_download_dir # Create dir. to download / copy files into try: subprocess.call(['mkdir', '-p', lund_download_dir]) except Exception as e: print("WARNING: unable to make directory {}".format(lund_download_dir)) print("The error encountered was: \n {}".format(e)) f = open("lundException.txt", "a") f.write("\n an exception was encountered at {}, see below: \n".format(utils.gettime())) f.write(str(e)) f.close() ################################################################## # Case 3/4 - download single / multiple files from online location ################################################################## if 'http' in lund_location: # Download single web file if any([ext in lund_location for ext in valid_lund_extensions]): lund_dir_unformatted = lund_location.split("/") lund_filename = lund_dir_unformatted[-1] # the gets the name of the lund file, assuming the format is http......./lund_file_name #Pass the location, file, and download dir to the downloader function Lund_Downloader(lund_url_base = lund_location,lund_download_dir = lund_download_dir, lund_filename = lund_filename) # Download entire web directory else: try: #Read the given location to find all the lund files raw_html, lund_filenames = html_reader.html_reader(lund_location, valid_lund_extensions) except Exception as e: print("ERROR: unable to download lund files from {}".format(lund_location)) print("The error encountered was: \n {}".format(e)) f = open("lundException.txt", "a") f.write("\n an exception was encountered at {}, see below: \n".format(utils.gettime())) f.write(str(e)) f.close() exit() if len(lund_filenames) == 0: print("No Lund files found (they must end in '{}'). Is the online repository correct?".format(valid_lund_extensions)) exit() #Loop through downloading every LUND file in directory for lund_filename in lund_filenames: Lund_Downloader(lund_url_base = lund_location,lund_download_dir = lund_download_dir, lund_filename = lund_filename, single_file = False) ####################################################################### # Case 1/2 - Use RSync to copy files from a jlab location to OSG # RSYNC option: rlpgoD replaces -a (rlptgoD) so time is not preserved: # When copied, the files will have a new timestamp, which will play # nice with our autodeletion cronjobs ###################################################################### else: # Single local file if any([ext in lund_location for ext in valid_lund_extensions]): try: #print("Trying to copy Lund file from {}".format(lund_location)) if lund_location[0] != "/": lund_location = "/"+lund_location #Example full filepath: gemc@dtn1902-ib:/lustre19/expphy/volatile/clas12/robertej/testlund.txt lund_copy_path = 'gemc@dtn1902-ib:/lustre19/expphy'+lund_location subprocess.call(['rsync', '-rlpgoD', lund_copy_path, lund_download_dir]) except Exception as e: print("ERROR: unable to copy lund files from {}".format(lund_location)) print("The error encountered was: \n {}".format(e)) f = open("lundException.txt", "a") f.write("\n an exception was encountered at {}, see below: \n".format(utils.gettime())) f.write(str(e)) f.close() # Local directory, many files else: if lund_location[0] != "/": lund_location = "/"+lund_location if lund_location[-1] != "/": lund_location += "/" if "/lustre19/expphy" not in lund_location: lund_location ='/lustre19/expphy'+lund_location #print("trying to rsync {}".format(lund_location)) lund_copy_path = 'gemc@dtn1902-ib:'+lund_location #subprocess.call(['rsync', '-a', lund_copy_path, lund_download_dir]) subprocess.call(['rsync', '-zrlpgoDv','--prune-empty-dirs',"--include='*.dat'", "--include='*.txt'","--exclude='*'",lund_copy_path, lund_download_dir]) files = os.listdir(lund_download_dir) for f in files: if not any([ext in f for ext in valid_lund_extensions]): os.remove(lund_download_dir+f) return lund_download_dir
tsys = 50 gain = 1e6 f1 = 0.95e9 f2 = 1.75e9 ## det = detector.Detector(tsys, gain, f1, f2, 5e-9) deltav1 = np.array([]) deltav2 = np.array([]) #for f1,f2 in zip(files1,files2): #for f1,f2 in zip(files1[:1],files2[:1]): for f1,f2 in zip(files1[::5],files2[::5]): wf1 = utils.readscopefile(f1) wf2 = utils.readscopefile(f2) [pd,board] = utils.resize(wf1[1],wf2[1]) [time1,time2] = utils.gettime(wf1[0],wf2[0]) delay = utils.finddelay2(pd,board) board = np.roll(board,delay) realpd = simulation.Simulation(det=det,sampling=5e9) realpd.noise = pd realpd.time = time1 realpdwf = waveform.Waveform(realpd.time,realpd.noise, type='powerdetector') realboard = simulation.Simulation(det=det,sampling=5e9) realboard.noise = board realboard.time = time1 realboardwf = waveform.Waveform(realboard.time,realboard.noise, type='board') simboard = simulation.Simulation(det=det,sampling=5e9) simboard.noise = pd
for f1, f2 in zip(filesRF[::5], filesPD[::5]): mindist = 1e6 #result for each file dist = np.array([]) besttau = 0 besta = 0 bestb = 0 wfRF = utils.readscopefile(f1) wfPD = utils.readscopefile(f2) for t in taus: conv = utils.produceresponse(wfRF[0],wfRF[1],t) real = wfPD[1] sim = conv[1] #resize the two waveforms to the same size (because of the convolution) [real,sim] = utils.resize(real,sim) time = utils.gettime(wfPD[0],conv[0]) delay = utils.finddelay2(real,sim) simshifted = np.roll(sim,delay) #fit the conv vs power: fitconv_pd = np.polyfit(simshifted,real,1) polyconv_pd = np.poly1d(fitconv_pd) simpd = polyconv_pd(simshifted) size = len(simpd) alpha = np.sum( (simpd - real)**2) dist = np.append(dist,alpha) if alpha < mindist: mindist = alpha besttau = t besta = fitconv_pd[0] bestb = fitconv_pd[1] outname = resultfolder + 'distance_' + str(count)
def main(args): src_path, _ = os.path.split(os.path.realpath(__file__)) # Create result directory res_name = utils.gettime() res_dir = os.path.join(src_path, 'results', res_name) os.makedirs(res_dir, exist_ok=True) log_filename = os.path.join(res_dir, 'log.h5') model_filename = os.path.join(res_dir, res_name) # Store some git revision info in a text file in the log directory utils.store_revision_info(src_path, res_dir, ' '.join(sys.argv)) # Store parameters in an HDF5 file utils.store_hdf(os.path.join(res_dir, 'parameters.h5'), vars(args)) # Copy learning rate schedule file to result directory learning_rate_schedule = utils.copy_learning_rate_schedule_file( args.learning_rate_schedule, res_dir) with tf.Session() as sess: tf.set_random_seed(args.seed) np.random.seed(args.seed) filelist = ['train_%03d.pkl' % i for i in range(200)] dataset = create_dataset(filelist, args.data_dir, buffer_size=20000, batch_size=args.batch_size, total_seq_length=args.nrof_init_time_steps + args.seq_length) # Create an iterator over the dataset iterator = dataset.make_one_shot_iterator() obs, action = iterator.get_next() is_pdt_ph = tf.placeholder(tf.bool, [None, args.seq_length]) is_pdt = create_transition_type_matrix(args.batch_size, args.seq_length, args.training_scheme) with tf.variable_scope('env_model'): env_model = EnvModel(is_pdt_ph, obs, action, 1, model_type=args.model_type, nrof_time_steps=args.seq_length, nrof_free_nats=args.nrof_free_nats) reg_loss = tf.reduce_mean(env_model.regularization_loss) rec_loss = tf.reduce_mean(env_model.reconstruction_loss) loss = reg_loss + rec_loss global_step = tf.Variable(0, name='global_step', trainable=False) learning_rate_ph = tf.placeholder(tf.float32, ()) train_op = tf.train.AdamOptimizer(learning_rate_ph).minimize( loss, global_step=global_step) saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) stat = { 'loss': np.zeros((args.max_nrof_steps, ), np.float32), 'rec_loss': np.zeros((args.max_nrof_steps, ), np.float32), 'reg_loss': np.zeros((args.max_nrof_steps, ), np.float32), 'learning_rate': np.zeros((args.max_nrof_steps, ), np.float32), } try: print('Started training') rec_loss_tot, reg_loss_tot, loss_tot = (0.0, 0.0, 0.0) lr = None t = time.time() for i in range(1, args.max_nrof_steps + 1): if not lr or i % 100 == 0: lr = utils.get_learning_rate_from_file( learning_rate_schedule, i) if lr < 0: break stat['learning_rate'][i - 1] = lr _, rec_loss_, reg_loss_, loss_ = sess.run( [train_op, rec_loss, reg_loss, loss], feed_dict={ is_pdt_ph: is_pdt, learning_rate_ph: lr }) stat['loss'][i - 1], stat['rec_loss'][i - 1], stat['reg_loss'][ i - 1] = loss_, rec_loss_, reg_loss_ rec_loss_tot += rec_loss_ reg_loss_tot += reg_loss_ loss_tot += loss_ if i % 10 == 0: print( 'step: %-5d time: %-12.3f lr: %-12.6f rec_loss: %-12.1f reg_loss: %-12.1f loss: %-12.1f' % (i, time.time() - t, lr, rec_loss_tot / 10, reg_loss_tot / 10, loss_tot / 10)) rec_loss_tot, reg_loss_tot, loss_tot = (0.0, 0.0, 0.0) t = time.time() if i % 5000 == 0 and i > 0: saver.save(sess, model_filename, i) if i % 100 == 0: utils.store_hdf(log_filename, stat) except tf.errors.OutOfRangeError: pass print("Saving model...") saver.save(sess, model_filename, i) print('Done!')
def simulate_period(period_idx, period, processes, entities, init=False): print "\nperiod", period if init: for entity in entities: print " * %s: %d individuals" % (entity.name, len(entity.array)) else: print "- loading input data" for entity in entities: print " *", entity.name, "...", timed(entity.load_period_data, period) print " -> %d individuals" % len(entity.array) for entity in entities: entity.array['period'] = period if processes: # build context for this period: const_dict = {'period': period, 'nan': float('nan')} # update "globals" with their value for this period if periodic_globals is not None: globals_row = period - globals_base_period if globals_row < 0: #XXX: use missing values instead? raise Exception('Missing globals data for period %d' % period) period_globals = periodic_globals[globals_row] const_dict.update((k, period_globals[k]) for k in period_globals.dtype.names) const_dict['__globals__'] = periodic_globals num_processes = len(processes) for p_num, process_def in enumerate(processes, start=1): process, periodicity = process_def print "- %d/%d" % (p_num, num_processes), process.name, #TODO: provided a custom __str__ method for Process & # Assignment instead if hasattr(process, 'predictor') and process.predictor \ and process.predictor != process.name: print "(%s)" % process.predictor, print "...", if period_idx % periodicity == 0: elapsed, _ = gettime(process.run_guarded, self, const_dict) else: elapsed = 0 print "skipped (periodicity)" process_time[process.name] += elapsed print "done (%s elapsed)." % time2str(elapsed) self.start_console(process.entity, period) print "- storing period data" for entity in entities: print " *", entity.name, "...", timed(entity.store_period_data, period) print " -> %d individuals" % len(entity.array) # print " - compressing period data" # for entity in entities: # print " *", entity.name, "...", # for level in range(1, 10, 2): # print " %d:" % level, # timed(entity.compress_period_data, level) period_objects[period] = sum(len(entity.array) for entity in entities)
def process(ml): if not filter.filter('cookie', ml, WHITE_LIST): return True cookie = ml.attr('cookie') goals = ml.attr('goals') all_searches = [] for _goal in goals: all_searches.extend(_goal.attr('searches')) all_idx = -1 for _goal in goals: searches = _goal.attr('searches') len_searches = len(searches) for (_idx, _search) in enumerate(searches): all_idx += 1 if not filter.filter('search', _search, WHITE_LIST): #if not filter.filter('search', _search, all_searches): continue kvs = KVS.copy() actions = _search.attr('actions_info') real_actions = [] for _action in actions: if _action.attr('fm') not in ['se', 'inlo']: real_actions.append(_action) date_time_s = _search.attr('date_time') date_time_c = actions[0].attr('date_time') if date_time_s: date_time = date_time_s elif date_time_c: date_time = date_time_c else: continue c = datetime.datetime.strptime(date_time, '%d/%b/%Y:%H:%M:%S') kvs['date'] = c.strftime('%Y-%m-%d') kvs['time'] = c.strftime('%H:%M:%S') # 计算首点、尾点相关信息 if real_actions: session_start_time = utils.gettime(date_time_c) first_action = real_actions[0] first_click_time = utils.gettime( first_action.attr('date_time')) - session_start_time if 0 < first_click_time < 120: kvs['first_click_time'] = first_click_time kvs['has_first_click'] = 1 else: kvs['first_click_time'] = 0 fm = first_action.attr('fm') kvs['first_click_type'] = fm if fm in ['as', 'beha', 'behz'] or (fm.startswith('al') and fm != 'alxr'): kvs['first_click_pos'] = first_action.attr('click_pos') if first_action.attr('is_satisfied_click') == 1: kvs['first_click_satisfaction'] = 1 last_action = real_actions[-1] page_stay_time = utils.gettime( last_action.attr('date_time')) - session_start_time if page_stay_time > 0: kvs['page_stay_time'] = page_stay_time kvs['has_page_stay'] = 1 fm = last_action.attr('fm') kvs['last_click_type'] = fm if fm in ['as', 'beha', 'behz'] or (fm.startswith('al') and fm != 'alxr'): kvs['last_click_pos'] = last_action.attr('click_pos') if last_action.attr('is_satisfied_click') == 1: kvs['last_click_satisfaction'] = 1 query_info = _search.attr('query_info') query = query_info.attr('query') # 计算通用信息 kvs['query'] = query kvs['query_len'] = len(query.decode('gb18030')) kvs['f'] = query_info.attr('f') kvs['ip'] = _search.attr('ip') kvs['cookie_qid'] = cookie + '_' + _search.attr('qid') kvs['tn'] = _search.attr('tn') kvs['satisfaction'] = _search.attr('satisfaction') # 计算前一个query和后一个query if _idx > 0: kvs['query_last'] = searches[_idx - 1].attr('query_info.query') if _idx + 1 < len_searches: kvs['query_next'] = searches[_idx + 1].attr('query_info.query') # 计算翻页、换query、rs _all_idx = all_idx + 1 # 换query、翻页是同一个goal for _se in searches[(_idx + 1):]: _all_idx += 1 page_no = _se.attr('page_no') if page_no == 1: query_info = _se.attr('query_info') new_query = query_info.attr('query') f = query_info.attr('f') if f in ['3', '8']: if kvs['query_len'] <= 7: if new_query != query and query in new_query: kvs['query_change'] = 1 else: if new_query != query: kvs['query_change'] = 1 elif f == '1': kvs['rs'] = 1 kvs['query_rs'] = new_query break kvs['page_turn'] += 1 else: # rs是跨goal的 for _se in all_searches[_all_idx:]: page_no = _se.attr('page_no') if page_no == 1: f = _se.attr('query_info.f') if f == '1': kvs['rs'] = 1 kvs['query_rs'] = _se.attr('query_info.query') break tp_dict = utils.splitTp(actions[0].attr('tp')) sids = tp_dict.get('rsv_sid', '').split('_') for _s in filter.SID_LIST: if str(_s) in sids: kvs['sid'] = _s break len_real_actions = len(real_actions) src_act = {} # 特定卡片的点击 # 计算点击相关信息 action_info_list = [] for (_nex_act, _action) in enumerate(real_actions, 1): fm = _action.attr('fm') if fm == 'tab': # 单独统计tab点击 kvs['tab_click'] += 1 tab = _action.attr('tab') if tab in [ 'music', 'news', 'zhidao', 'pic', 'video', 'map', 'wenku', 'more', 'tieba' ]: kvs['tab_' + tab] += 1 else: kvs['total_click'] += 1 # 包括交互 is_satisfied_click = _action.attr('is_satisfied_click') if is_satisfied_click == 1: kvs['satisfaction_click'] += 1 # 满意点击 t1 = utils.gettime(_action.attr('date_time')) _l = '' if fm not in ['beha', 'behz']: # 长点击不包括交互 if _nex_act != len_real_actions: t2 = utils.gettime( real_actions[_nex_act].attr('date_time')) dura = t2 - t1 # 当前点击的时间与后一个用户点击的时间差值 real_dura_time = t2 - t1 else: # 如果为最后一次点击 si = _idx + 1 dura = 0 real_dura_time = 0 if si < len_searches: # goal的非最后一个search t2 = searches[si].attr('date_time') if t2: dura = utils.gettime(t2) - t1 real_dura_time = utils.gettime(t2) - t1 elif is_satisfied_click == 1: dura = 40 real_dura_time = 120 if dura >= 40: _l = 'long_click' elif dura < 5: _l = 'short_click' else: _l = '' if real_dura_time >= 120: kvs['long_long_click'] += 1 click_pos = _action.attr('click_pos') action_tuple = (fm, click_pos, _l) action_info_list.append(action_tuple) if _l: kvs[_l] += 1 # 长点击/短点击 if filter.SRCID_LIST: # 统计卡片相关的点击信息 tp = _action.attr('tp') tp_dict = utils.splitTp(tp) srcid = tp_dict.get('rsv_srcid', '0') if int(srcid) in filter.SRCID_LIST: kvs['src_' + srcid + '_click'] += 1 # 卡片的点击 if fm in ['beha', 'behz']: kvs['src_' + srcid + '_behz_click'] += 1 # 卡片的交互点击 if _l: kvs['src_' + srcid + '_' + _l] += 1 # 卡片的长点击/短点击 if is_satisfied_click == 1: kvs['src_' + srcid + '_satisfaction_click'] += 1 # 卡片的满意点击 if srcid not in src_act: src_act[srcid] = [] src_act[srcid].append(_action) # 记录下卡片的点击 if fm in ['pp', 'ppim', 'im', 'lm', 'pl', \ 'plr', 'alxr', 'alop', 'as']: _k = fm elif fm in ['behz', 'beha']: _k = 'behz' elif fm.startswith('al'): _k = 'al' else: continue kvs[_k + '_click'] += 1 # 不同类型的点击 # 分位置点击 if _k in ['alop', 'as', 'al', 'behz']: click_pos = _action.attr('click_pos') tp = _action.attr('tp') tp_dict = utils.splitTp(tp) rsv_tpl = tp_dict.get('rsv_tpl', '-') if _k == 'behz' and rsv_tpl.startswith('right_'): continue if 0 < click_pos < 21: kvs['pos' + str(click_pos) + '_click'] += 1 if _l: kvs['pos' + str(click_pos) + '_' + _l] += 1 if is_satisfied_click == 1: kvs['pos' + str(click_pos) + '_satisfaction_click'] += 1 if filter.SRCID_LIST: urls = _search.attr('urls_info') for _url in urls: # 卡片的展现信息 srcid = utils.getSrcidFromDisplay(_url) if srcid and int(srcid) in filter.SRCID_LIST: kvs['src_' + str(srcid) + '_disp'] = 1 dis_pos = _url.attr('display_pos') if dis_pos > 0: kvs['src_' + str(srcid) + '_pos'] = dis_pos na_pos = _url.attr('natural_pos') if na_pos > 0: kvs['src_' + str(srcid) + '_pos_na'] = na_pos for _src in src_act: # 卡片的停留时间 if src_act[_src][-1].attr('satisfaction_click') == 1: _d = 200 elif src_act[_src] == real_actions[-1] and \ src_act[_src][-1].attr('fm') == 'alop': _d = 20 else: f = utils.gettime(src_act[_src][0].attr('date_time')) l = utils.gettime(src_act[_src][-1].attr('date_time')) _d = l - f if _d > 200: _d = 200 kvs['src_' + _src + '_stay'] = _d if kvs['total_click'] > 0: kvs['click_pv'] = 1 kvs.update( WORKER.getValues(_search, WHITE_LIST, kvs, action_info_list)) # 输出PV级别数据 print(MAP_DIC[random.randint(50, 99)] + '\t' + '\t'.join([str(kvs[x]) for x in KEYS])).decode('gb18030').encode('utf8') # 输出query级别数据 print(MAP_DIC[hash(kvs['query']) % 48 + 2] + '\t' + kvs['query'] + '\t' + '\t'.join([str(kvs[x]) for x in KEYS])).decode('gb18030').encode('utf8') # 这里是干什么? tmp_date = kvs['date'] if filter.URL_FLAG == True: for i in range(0, 10): if WHITE_LIST.get( _search.attr('urls_list')[i].attr('url')): tmp_sid = str(kvs['sid']) + '@' + WHITE_LIST[ _search.attr('urls_list')[i].attr('url')] else: tmp_sid = str(kvs['sid']) else: tmp_sid = str(kvs['sid']) # 构建SID_DIC和SID_HAS_DIC if not SID_DIC.get(tmp_sid): SID_DIC[tmp_sid] = {} SID_DIC[tmp_sid][tmp_date] = {} else: if not SID_DIC[tmp_sid].get(tmp_date): SID_DIC[tmp_sid][tmp_date] = {} if not SID_HAS_DIC.get(tmp_sid): SID_HAS_DIC[tmp_sid] = {} SID_HAS_DIC[tmp_sid][tmp_date] = {} else: if not SID_HAS_DIC[tmp_sid].get(tmp_date): SID_HAS_DIC[tmp_sid][tmp_date] = {} for idx, name in enumerate(KEYS): if not KVS[name] == '-': if KVS[name] == 'enum': if not SID_DIC[tmp_sid][tmp_date].get(name): SID_DIC[tmp_sid][tmp_date][name] = {} if SID_DIC[tmp_sid][tmp_date][name].get(kvs[name]): SID_DIC[tmp_sid][tmp_date][name][kvs[name]] += 1 else: SID_DIC[tmp_sid][tmp_date][name][kvs[name]] = 1 else: if SID_DIC[tmp_sid][tmp_date].get(name): SID_DIC[tmp_sid][tmp_date][name] += kvs[name] else: SID_DIC[tmp_sid][tmp_date][name] = kvs[name] if SID_HAS_DIC[tmp_sid][tmp_date].get(name): if kvs[name] > 0: SID_HAS_DIC[tmp_sid][tmp_date][name] += 1 else: if kvs[name] > 0: SID_HAS_DIC[tmp_sid][tmp_date][name] = 1 else: SID_HAS_DIC[tmp_sid][tmp_date][name] = 0
if __name__ == '__main__': ap = argparse.ArgumentParser() ap.add_argument('-l', '--logfile', required=True) ap.add_argument('-o', '--output', required=True) ap.add_argument('-q', '--lite', required=False) args = ap.parse_args() print(args.lite) # Connect to our database with read/write access. db_conn, sql = connect_to_database(args.lite) print(db_conn, sql) logfile = args.logfile logtime = gettime() with open(logfile, 'r') as raw_log: log_text = raw_log.readlines() log_text = [l.strip().split() for l in log_text] log_text = [l for l in log_text if l] columns = log_text[0] footer = log_text[-1] print("columns:") print(columns) print("footer") print(footer) json_dict = {}
def htcondor_submit(args, scard, usub_id, file_extension, params, db_conn, sql, idle_limit=100000): #Test to see if user has too many jobs currently running: #shouldBeSubmitted will return false if number of jobs for that user is over idle limit if not shouldBeSubmitted(params['username'], idle_limit=idle_limit): print("user is over limit for number of jobs, waiting to submit") timestamp = utils.gettime() update_tables.update_farm_submission_to_waiting( usub_id, timestamp, db_conn, sql) return 1 jobOutputDir = args.OutputDir if args.OutputDir == "TestOutputDir": print("Test output dir specified") jobOutputDir = os.path.dirname(os.path.abspath(__file__)) + '/../..' if args.test_condorscript: scripts_baseDir = os.path.dirname(os.path.abspath(__file__)) + '/../..' condor_exec = scripts_baseDir + "/server/condor_submit.sh" else: # Need to add condition here in case path is different for non-jlab scripts_baseDir = "/group/clas12/SubMit" condor_exec = scripts_baseDir + "/server/condor_submit.sh" if args.lite: dbType = "Test SQLite DB" dbName = "../../utils/CLAS12OCR.db" elif args.test_database: dbType = "Test MySQL DB" dbName = fs.MySQL_Test_DB_Name else: dbType = "Production MySQL DB" dbName = fs.MySQL_Prod_DB_Name print(dbType) print(dbName) print("submitting job, output going to {0}".format(jobOutputDir)) url = scard.generator if scard.genExecutable == "Null" else 'no_download' #The following is useful for testing on locations which do not have htcondor installed #This allows us to go all the way through with condor_submit.sh even if htcondor does not exist htcondor_version = Popen(['which', 'condor_submit'], stdout=PIPE).communicate()[0] if not htcondor_version: htcondor_present = "no" else: htcondor_present = "yes" print(htcondor_present) if args.submit: # don't know how to pass farmsubmissionID (4th argument), passing GcardID for now (it may be the same) # error: we really need to pass farmsubmissionID print("trying to submit job now") #print([condor_exec, scripts_baseDir, jobOutputDir, params['username'], # str(usub_id), url, dbType, dbName]) #Note: Popen array arguements must only contain strings submission = Popen([ condor_exec, scripts_baseDir, jobOutputDir, params['username'], str(usub_id), url, dbType, dbName, str(htcondor_present) ], stdout=PIPE).communicate()[0] print(submission) words = submission.split() node_number = words[len(words) - 1] # This might only work on SubMIT timestamp = utils.gettime() update_tables.update_farm_submissions(usub_id, timestamp, node_number, db_conn, sql) else: print("-s option not selected, not passing jobs to condor_submit.sh")
def run(self, run_console=False): start_time = time.time() h5in, h5out, globals_data = timed(self.data_source.run, self.globals_def, entity_registry, self.init_period) if config.autodump or config.autodiff: if config.autodump: fname, _ = config.autodump mode = 'w' else: # config.autodiff fname, _ = config.autodiff mode = 'r' fpath = os.path.join(config.output_directory, fname) h5_autodump = tables.openFile(fpath, mode=mode) config.autodump_file = h5_autodump else: h5_autodump = None # input_dataset = self.data_source.run(self.globals_def, # entity_registry) # output_dataset = self.data_sink.prepare(self.globals_def, # entity_registry) # output_dataset.copy(input_dataset, self.init_period - 1) # for entity in input_dataset: # indexed_array = buildArrayForPeriod(entity) # tell numpy we do not want warnings for x/0 and 0/0 np.seterr(divide='ignore', invalid='ignore') process_time = defaultdict(float) period_objects = {} def simulate_period(period_idx, period, periods, processes, entities, init=False): print("\nperiod", period) if init: for entity in entities: print(" * %s: %d individuals" % (entity.name, len(entity.array))) else: print("- loading input data") for entity in entities: print(" *", entity.name, "...", end=' ') timed(entity.load_period_data, period) print(" -> %d individuals" % len(entity.array)) for entity in entities: entity.array_period = period entity.array['period'] = period if processes: # build context for this period: const_dict = {'period_idx': period_idx+1, 'periods': periods, 'periodicity': time_period[self.time_scale]*(1 - 2*(self.retro)), 'format_date': self.time_scale, 'nan': float('nan'), '__globals__': globals_data} assert(periods[period_idx+1] == period) num_processes = len(processes) for p_num, process_def in enumerate(processes, start=1): process, periodicity, start = process_def print("- %d/%d" % (p_num, num_processes), process.name, end=' ') #TODO: provide a custom __str__ method for Process & # Assignment instead if hasattr(process, 'predictor') and process.predictor \ and process.predictor != process.name: print("(%s)" % process.predictor, end=' ') print("...", end=' ') # TDOD: change that if isinstance(periodicity, int ): if period_idx % periodicity == 0: elapsed, _ = gettime(process.run_guarded, self, const_dict) else: elapsed = 0 print("skipped (periodicity)") else: assert (periodicity in time_period) periodicity_process = time_period[periodicity] periodicity_simul = time_period[self.time_scale] month_idx = period % 100 # first condition, to run a process with start == 12 # each year even if year are yyyy01 #modify start if periodicity_simul is not month start = int(start/periodicity_simul-0.01)*periodicity_simul + 1 if (periodicity_process <= periodicity_simul and self.time_scale != 'year0') or \ month_idx % periodicity_process == start % periodicity_process: const_dict['periodicity'] = periodicity_process*(1 - 2*(self.retro)) elapsed, _ = gettime(process.run_guarded, self, const_dict) else: elapsed = 0 print("skipped (periodicity)") process_time[process.name] += elapsed if config.show_timings: print("done (%s elapsed)." % time2str(elapsed)) else: print("done.") self.start_console(process.entity, period, globals_data) # pdb.set_trace() #self.entities[2].table print("- storing period data") for entity in entities: print(" *", entity.name, "...", end=' ') timed(entity.store_period_data, period) print(" -> %d individuals" % len(entity.array)) # print " - compressing period data" # for entity in entities: # print " *", entity.name, "...", # for level in range(1, 10, 2): # print " %d:" % level, # timed(entity.compress_period_data, level) period_objects[period] = sum(len(entity.array) for entity in entities) try: assert(self.time_scale in time_period) month_periodicity = time_period[self.time_scale] time_direction = 1 - 2*(self.retro) time_step = month_periodicity*time_direction periods = [ self.init_period + int(t/12)*100 + t%12 for t in range(0, (self.periods+1)*time_step, time_step)] if self.time_scale == 'year0': periods = [ self.init_period + t for t in range(0, (self.periods+1))] print("simulated period are going to be: ",periods) init_start_time = time.time() simulate_period(0, self.init_period, [None,periods[0]], self.init_processes, self.entities, init=True) time_init = time.time() - init_start_time main_start_time = time.time() for period_idx, period in enumerate(periods[1:]): period_start_time = time.time() simulate_period(period_idx, period, periods, self.processes, self.entities) # if self.legislation: # if not self.legislation['ex_post']: # # elapsed, _ = gettime(liam2of.main,period) # process_time['liam2of'] += elapsed # elapsed, _ = gettime(of_on_liam.main,self.legislation['annee'],[period]) # process_time['legislation'] += elapsed # elapsed, _ = gettime(merge_leg.merge_h5,self.data_source.output_path, # "C:/Til/output/"+"simul_leg.h5",period) # process_time['merge_leg'] += elapsed time_elapsed = time.time() - period_start_time print("period %d done" % period, end=' ') if config.show_timings: print("(%s elapsed)." % time2str(time_elapsed)) else: print() total_objects = sum(period_objects[period] for period in periods) # if self.legislation: # if self.legislation['ex_post']: # # elapsed, _ = gettime(liam2of.main) # process_time['liam2of'] += elapsed # elapsed, _ = gettime(of_on_liam.main,self.legislation['annee']) # process_time['legislation'] += elapsed # # TODO: faire un programme a part, so far ca ne marche pas pour l'ensemble # # adapter n'est pas si facile, comme on veut economiser une table, # # on ne peut pas faire de append directement parce qu on met 2010 apres 2011 # # a un moment dans le calcul # elapsed, _ = gettime(merge_leg.merge_h5,self.data_source.output_path, # "C:/Til/output/"+"simul_leg.h5",None) # process_time['merge_leg'] += elapsed if self.final_stat: elapsed, _ = gettime(stat,period) process_time['Stat'] += elapsed total_time = time.time() - main_start_time time_year = 0 if len(periods)>1: nb_year_approx = periods[-1]/100 - periods[1]/100 if nb_year_approx > 0 : time_year = total_time/nb_year_approx print (""" ========================================== simulation done ========================================== * %s elapsed * %d individuals on average * %d individuals/s/period on average * %s second for init_process * %s time/period in average * %s time/year in average ========================================== """ % (time2str(time.time() - start_time), total_objects / self.periods, total_objects / total_time, time2str(time_init), time2str(total_time / self.periods), time2str(time_year))) show_top_processes(process_time, 10) # if config.debug: # show_top_expr() if run_console: c = console.Console(self.console_entity, periods[-1], self.globals_def, globals_data) c.run() finally: if h5in is not None: h5in.close() h5out.close() if h5_autodump is not None: h5_autodump.close()