def train_feature(patches_fname,offsets_fname,f,haars_per_feat): patches,offsets = load(patches_fname),load(offsets_fname) haar_locs = load("haar_locs.pyz") x_regressors = boost(patches,offsets[:,0],haars_per_feat,haar_locs) y_regressors = boost(patches,offsets[:,1],haars_per_feat,haar_locs) pdump(x_regressors,open("x_regressors_%d.pyz"%f,'w'),-1) pdump(y_regressors,open("y_regressors_%d.pyz"%f,'w'),-1)
def fire(self, sources, values, trigger, etext): text = self.etext.get() process = 0 if self.thetype == "IPs": if match(r'^((\d{1,2}|1\d{2}|2[0-4]\d|25[0-5])\.){3}(\d{1,2}|1\d{2}|2[0-4]\d|25[0-5])$', etext.get()): process += 1 if self.thetype == "URLs": process += 1 if self.thetype == "SPECs": process += 1 if process >= 1: self.tbox["background"] = "White" self.tbox["foreground"] = "Black" counter = 0 jobs_list = [] if not (self.thetype == "SPECs"): for x in xrange(len(sources[:])): if values[0][x].get(): thread = threading.Thread(target=self.TaskHandler, args=(x, )) thread.setDaemon(True) jobs_list.append(thread) else: thread = threading.Thread(target=self.TaskHandler, args=(values, )) thread.setDaemon(True) jobs_list.append(thread) for j in jobs_list: j.start() if not (self.thetype == "SPECs"): for line in open(self.read, 'r'): if text in line: counter = counter +1 for line in open(self.blread, 'r'): if text in line: counter = counter +1 if counter == 0: with open(self.read, 'a') as z: z.write(str(etext.get())+"\n") else: self.tbox["background"] = "Red" self.tbox["foreground"] = "white" export = [] set_save = "" if self.thetype == "IPs": set_save = self.a.IPSettingsfile for x in xrange(len(self.a.ip.values)): if self.values[0][x].get() == 0: export.append(0) else: export.append(1) pdump(export, open(set_save, 'wb')) if self.thetype == "URLs": set_save = self.a.URLSettingsfile for x in xrange(len(self.a.url.values)): if self.values[0][x].get() == 0: export.append(0) else: export.append(1) pdump(export, open(set_save, 'wb'))
def showSettings(self, where): if self.showsettings.get() == 0: self.app.grid_forget() self.update() self.top.update() set_save="" export = [] if self.thetype == "IPs": set_save = self.a.IPdimensions if self.thetype == "URLs": set_save = self.a.URLdimensions if self.thetype == "SPECs": set_save = self.a.SPECIALdimensions export.append(int(self.winfo_width())) export.append(int(self.winfo_height())) pdump( export, open(set_save, 'wb')) if self.showsettings.get() == 1: self.app.grid(column=4, row=where, sticky="NW") self.app.update() self.update() self.top.update() set_save = "" export = [] if self.thetype == "IPs": set_save = self.a.IPdimensions if self.thetype == "URLs": set_save = self.a.URLdimensions if self.thetype == "SPECs": set_save = self.a.SPECIALdimensions export.append(int(self.winfo_width())) export.append(int(self.winfo_height())) pdump(export, open(set_save, 'wb')) if (path.getsize(self.a.IPdimensions) > 0) and (path.getsize(self.a.URLdimensions) > 0) and (path.getsize(self.a.SPECIALdimensions) > 0): self.update() IPs_dim = pload(open(self.a.IPdimensions, "rb")) URLs_dim = pload(open(self.a.URLdimensions, "rb")) SPECs_dim = pload(open(self.a.SPECIALdimensions, "rb")) theIPx = int(IPs_dim[0]) theIPy = int(IPs_dim[1]) theURLx = int(URLs_dim[0]) theURLy = int(URLs_dim[1]) theSPECx = int(SPECs_dim[0]) theSPECy = int(SPECs_dim[1]) newx = 0 newy = theIPy + theURLy + theSPECy + 15 if (theIPx > newx): newx = theIPx if (theURLx > newx): newx = theURLx if (theSPECx > newx): newx = theSPECx self.top.geometry(str(newx) + "x" + str(newy)) self.top.update()
def get_access_token(path_to_tok='./translate.tok'): if fexist(path_to_tok): ftok = open(path_to_tok, 'r+') tokdata = pload(ftok) expiretime = tokdata['expires_in'] if (datetime.now() - expiretime) > timedelta(10,0): return tokdata['token'] else: ftok = open(path_to_tok, 'w') args = {'client_id':clientid,'client_secret':clientse,'scope':'http://api.microsofttranslator.com/','grant_type':'client_credentials'} enc_args = urllib.urlencode(args) req = urllib2.Request(tok_url,enc_args) response = urllib2.urlopen(req) data = json.load(response) timeandten = datetime.now() + timedelta(minutes = 10) pdump({'token':data['access_token'],'expires_in':timeandten}, ftok) return data['access_token']
def cache(target, args, identifier=None, cache_life=3 * 24 * 3600): ''' Run the target function with the given args, and store it to a pickled cache folder using the given identifier or the name of the function. The next time it is executed, the cached output is returned unless cache_life time expires. ''' if identifier == None: identifier = target.__name__ identifier = sub(r'[/\\\*;\[\]\':=,<>]', '_', identifier) path = join(PATH_RESOURCES, f'.pickled/{identifier}.pk') makedirs(dirname(path), exist_ok=True) now = time() if exists(path): with open(path, 'rb') as fp: save_time, value = pload(fp) if now - save_time <= cache_life: return value res = target(*args) with open(path, 'wb') as fp: pdump((now, res), fp, protocol=3) return res
def rescue(identifier, function, arguments, path_data="data", path_cache=".pickled/%s.pk", cache_life=259200, sr=22500, window=45000, invalid=r"[/\\\*;\[\]\":=,<>]"): """ Caches the output of a function. """ path = path_cache % sub(invalid, "_", identifier) makedirs(dirname(path), exist_ok=True) if exists(path): with open(path, "rb") as fp: save_time, rate, value, window = pload(fp) if NOW - save_time <= cache_life and rate == sr and window == window: return value res = function(*arguments) with open(path, "wb") as fp: pdump((NOW, sr, res, window), fp, protocol=3) return res
def step1_inputs_maker(path_input_file): file_name = os.path.basename(path_input_file) dir_work = os.path.dirname(path_input_file) id_ = file_name.split(".")[0] path_setting_file = os.path.join(dir_work, "{} - {}".format(id_, "prefs.p")) path_variable_file = os.path.join(dir_work, "{} - {}".format(id_, "args_main.p")) fire = standard_fire(np.arange(0, 3*60*60, 1), 273.15+20) inputs_extra = {"iso834_time": fire[0], "iso834_temperature": fire[1],} # list_kwargs, dict_settings = mc_inputs_generator(dict_extra_variables_to_add=inputs_extra, # dir_file=path_input_file) df_args, dict_settings = mc_inputs_generator2(dict_extra_variables_to_add=inputs_extra, dir_file=path_input_file) # Save list_kwargs as a pickle object. pdump(df_args, open(path_variable_file, "wb")) saveprint(os.path.basename(path_variable_file)) pdump(dict_settings, open(path_setting_file, "wb")) saveprint(os.path.basename(path_setting_file))
def loadAndCacheDbHTMLPages(databaseSiteBaseURL, databaseSiteHTMLDumpPath, numPages): if path.exists(databaseSiteHTMLDumpPath): print("## Status: Found cached copy of Music 4 Dance website.") databaseSiteHTMLPages = pload(open(databaseSiteHTMLDumpPath, "rb")) else: # Make HTTP request try: databaseSiteHTMLPages = [] for i in range(numPages): databaseSiteHTMLPages.append( getHTML(databaseSiteBaseURL + "?page=%d" % (i + 1)).text) except: print("!! Error: Retrieving Music 4 Dance website unsuccessfull.") exit(0) else: print("## Status: Retrieved Music 4 Dance website.") # Save for later pdump(databaseSiteHTMLPages, open(databaseSiteHTMLDumpPath, "wb")) print("## Status: Cached copy of Music 4 Dance website for later.") return databaseSiteHTMLPages
def __alt_authorize(self): credentials = None if USE_SERVICE_ACCOUNTS and not self.alt_auth: self.alt_auth = True if ospath.exists(self.__G_DRIVE_TOKEN_FILE): LOGGER.info("Authorize with token.pickle") with open(self.__G_DRIVE_TOKEN_FILE, 'rb') as f: credentials = pload(f) if credentials is None or not credentials.valid: if credentials and credentials.expired and credentials.refresh_token: credentials.refresh(Request()) else: flow = InstalledAppFlow.from_client_secrets_file( 'credentials.json', self.__OAUTH_SCOPE) LOGGER.info(flow) credentials = flow.run_console(port=0) # Save the credentials for the next run with open(self.__G_DRIVE_TOKEN_FILE, 'wb') as token: pdump(credentials, token) return build('drive', 'v3', credentials=credentials, cache_discovery=False) return None
def load_mc10(study_dir, pre_time=0, segment=True, sync=True, save=True, save_loc=None, save_subj=False, start_time_0=True, return_data=True): """ Load raw MC10 data from a study directory containing folders for each subjected as downloaded from the BioStamp RC web portal Parameters --------- study_dir : str Base study directory containing subject folders. pre_time : float, int, optional Amount of time in seconds to import before the start annotation. Only applied if 'segment' is True. Default is 0 seconds. segment : bool, optional Segment the data based on annotations. Defaults to True. sync : bool, optional Synchronize the timestamps for the inertial sensors. Timestamps for sensors with the same sampling rates will be the same. All sensors will start at the same time, regardless of sampling rate. save : bool, optional Whether or not to save (serialize) the imported data. Defaults to True. save_loc : str, optional Where to save the data. Options are None (save in import location), 'import' saves in import location, 'local' saves in the file location, or provide a file path where to save the data. Defaults to None. save_subj : bool, optional Whether or not to save each subject as individual files. If True, sets 'return_data' to False. Defaults to False. start_time_0 : bool, optional Start the timestamps at 0 time for each event, or the whole trial. Defaults to True. return_data : bool, optional Return the imported data as a dictionary (see Returns) if save is True. If 'save' is False, then always returns the imported data. Returns ------- save_paths : str, list, optional Path to saved data file(s). If 'save_subj' is true, is a list of all the subject files, else it is a str for the path to the one saved file. data : dict, optional Loaded data. This is returned if 'save' and 'return_data' are True, or 'save' is False. Top down structure is 'subject_id', 'sensor location', 'event id' (if segmenting), 'sensor_type' """ if save: if save_subj: # if splitting subjs, do not return any data return_data = False save_paths = [] # Determine the file save location if save_loc is None or save_loc == 'import': save_path = study_dir elif save_loc == 'local': save_path = getcwd() elif isinstance(save_loc, str): if path.isdir(save_loc): save_loc_split = save_loc.split(sep) if save_loc_split[-1] == '': save_path = sep.joint(save_loc_split[:-1]) else: save_path = save_loc else: raise FileNotFoundError(f'No such directory: {save_loc}') else: save_subj = False return_data = True bn = len(study_dir.split(sep)) # base length of study folder subjs = [ i for i in list(listdir(study_dir)) if path.isdir(study_dir + sep + i) ] # list all subfolders, which are subject IDs data = {i: dict() for i in subjs} # allocate data storage for each subject for sub in subjs: print(f'{sub}\n ---------') wkd = walk(study_dir + sep + sub) # walk down each subject folder temp = dict() for dname, _, fnames in wkd: if 'annotations.csv' in fnames and segment: # import annotations for data segmenting events, starts, stops = loadtxt(dname + sep + 'annotations.csv', dtype='U35', delimiter='","', skiprows=1, usecols=(2, 4, 5), unpack=True) # checking for non-unique event names uniq, inds, cnts = unique(events, return_counts=True, return_inverse=True) if any(cnts > 1): # if any events have more than 1 count ecnts = full_like( cnts, 1) # create array to keep track of number to add for k in range(len(inds)): # iterate over the events if cnts[inds[ k]] > 1: # if this event has more than one occurance events[ k] += f' {ecnts[inds[k]]}' # add the chronological number occurence it is to name ecnts[inds[ k]] += 1 # increment the occurence number tracker elif 'accel.csv' in fnames: sens_loc = dname.split(sep)[ bn + 1] # get the sensor location from the directory name temp[sens_loc] = dict( ) # make a sub dictionary for the sensor location for fname in fnames: # can be multiple files per location (ie accel and gyro) if 'errors' not in fname: # don't want to do anything with *_error.csv files # load data into data dictionary print(sens_loc, fname) temp[sens_loc][fname[:-4]] = loadtxt(dname + sep + fname, dtype=float, delimiter=',', skiprows=1) if sync: temp = _align_timestamps(temp) # align time stamps of data if segment: # segment the data data[sub] = _segment_data(temp, starts, stops, events, pre_time=pre_time, zero_start=start_time_0) else: for loc in temp.keys(): for typ in temp[loc].keys(): if start_time_0: temp[loc][typ][:, 0] -= temp[loc][typ][0, 0] data[sub] = temp for loc in data[sub].keys(): for typ in data[sub][loc].keys(): for ev in data[sub][loc][typ].keys(): print(1 / mean(diff(data[sub][loc][typ][ev][:, 0]))) data[sub][loc][typ][ ev][:, 0] /= 1000 # convert all timestamps to ms if save_subj: if save_loc is None or save_loc == 'import': fid = open( save_path + sep + sub + sep + f'data_subj_{sub}.pickle', 'wb') save_paths.append(save_path + sep + sub + sep + f'data_subj_{sub}.pickle') else: fid = open(save_path + sep + f'data_subj_{sub}.pickle', 'wb') save_paths.append(save_path + sep + f'data_subj_{sub}.pickle') pdump(data[sub], fid) # serialize the data fid.close() # close the file if not save_subj: fid = open(save_path + sep + 'data.pickle', 'wb') save_paths = save_path + sep + 'data.pickle' pdump(data, fid) fid.close() if return_data and save: return save_paths, data elif save: return save_paths else: return data
ts=TimeSeries(f_hdf5) print "hdf5 files are: ", f_hdf5 #cs= float(raw_input('enter cs in cm/sec: ')) #tB= float(raw_input('enter bondi time in sec: ')) for cnt,f in enumerate(f_hdf5): print "loading file: %s" % f pf= load(f) ts.Time[cnt]= pf.current_time cgrid= pf.h.covering_grid(level=lev,\ left_edge=pf.h.domain_left_edge, dims=pf.domain_dimensions) #approx from lev0 alld= pf.h.all_data() #yt exact gamma=1.001 cs_iso= cs_ad/gamma**0.5 dat= CovGrid_vs_AllDat(cgrid,alld,\ cs_iso,lev,pf.domain_width[0],pf.domain_dimensions[0]) #make histograms fsave=spath+"hist_"+pf.basename[5:9]+".png" # Bmag=(bx**2+by**2+bz**2)**0.5 # beta= 8*np.pi*rho*cs_iso**2/Bmag**2 # MachA=(beta/2)**0.5*MachS plot_hgrams(fsave,dat) #save Qs of Interest save_vals(ts,cnt,dat) fsave=spath+"tseries.png" plot_TSeries(fsave,ts,tB) fout=open(spath+"tseries.dump","w") pdump((ts,cs_iso,tB),fout) fout.close()
def main(updater): dispatcher = updater.dispatcher dbFuncs.initDB() newList = ConversationHandler( entry_points=[ CommandHandler('new', new, Filters.private), CommandHandler('start', start, Filters.private, pass_args=True) ], states={ SETNAME: [MessageHandler(Filters.text & Filters.private, setName)] }, fallbacks=[ CommandHandler('cancel', cancel, Filters.private, pass_user_data=True) ]) listHandler = ConversationHandler( entry_points=[CallbackQueryHandler(pushInline)], states={SETTINGS: [CallbackQueryHandler(pushAdmin)]}, fallbacks=[ CallbackQueryHandler(pushInline), CommandHandler('cancel', cancel, Filters.private, pass_user_data=True) ], per_message=True) dispatcher.add_handler(newList) dispatcher.add_handler(listHandler) dispatcher.add_handler(CallbackQueryHandler(pushInline)) dispatcher.add_handler( CommandHandler('send', sendAll, Filters.private & Filters.chat(chat_id=[114951690]))) dispatcher.add_handler(CommandHandler('help', help, Filters.private)) dispatcher.add_handler(CommandHandler('backup', backup, Filters.private)) dispatcher.add_handler(InlineQueryHandler(inlineQuery)) dispatcher.add_handler(ChosenInlineResultHandler(chosenQuery)) dispatcher.add_handler( MessageHandler(Filters.private & Filters.regex(r'^\/.*'), blankCode)) dispatcher.add_handler( MessageHandler( Filters.text & Filters.private & Filters.update.edited_message, editMessage)) dispatcher.add_handler( MessageHandler( Filters.private & Filters.text & Filters.reply & (~Filters.update.edited_message), rcvReply)) dispatcher.add_handler( MessageHandler( Filters.text & Filters.private & (~Filters.update.edited_message), rcvMessage)) dispatcher.add_error_handler(contextCallback) try: with open('{0}/userdata'.format(backupsDir), 'rb') as file: dispatcher.user_data = pload(file) except Exception as e: logger.warning(repr(e)) try: with open('{0}/newList'.format(backupsDir), 'rb') as file: newList.conversations = pload(file) except Exception as e: logger.warning(repr(e)) try: with open('{0}/listHandler'.format(backupsDir), 'rb') as file: listHandler.conversations = pload(file) except Exception as e: logger.warning(repr(e)) updater.start_polling() updater.idle() try: with open('{0}/userdata'.format(backupsDir), 'wb+') as file: pdump(dispatcher.user_data, file) except Exception as e: logger.warning(repr(e)) try: with open('{0}/newList'.format(backupsDir), 'wb+') as file: pdump(newList.conversations, file) except Exception as e: logger.warning(repr(e)) try: with open('{0}/listHandler'.format(backupsDir), 'wb+') as file: pdump(listHandler.conversations, file) except Exception as e: logger.warning(repr(e))
def step2_main_calc(path_input_file, n_proc=0, progress_print_interval=1): dir_work = os.path.dirname(path_input_file) kwargs_file_name = os.path.basename(path_input_file) # Load kwargs list_kwargs = pload(open(path_input_file, "rb")) # Identify the id string id_ = kwargs_file_name.split(" - ")[0] # Check number of processes are to be used n_proc = os.cpu_count() if int(n_proc) < 1 else int(n_proc) # SIMULATION print("SIMULATION STARTS") print(("{}{}\n" * 2).format("Number of Threads:", n_proc, "Total Simulations:", len(list_kwargs))) time_count_simulation = time.perf_counter() m = mp.Manager() q = m.Queue() p = mp.Pool(n_proc) jobs = p.map_async(worker, [(kwargs, q) for kwargs in list_kwargs]) count_total_simulations = len(list_kwargs) while progress_print_interval: if jobs.ready(): break else: print("SIMULATION COMPLETE {:3.0f} %...".format( q.qsize() * 100 / count_total_simulations)) time.sleep(progress_print_interval) results = jobs.get() time_count_simulation = time.perf_counter() - time_count_simulation print("SIMULATION COMPLETE IN {:0.3f} SECONDS".format( time_count_simulation, progress_print_interval)) # POST PROCESS print("POST PROCESSING STARTS") # format outputs results = np.array(results, dtype=float) df_outputs = pd.DataFrame({ "TIME EQUIVALENCE [min]": results[:, 0] / 60., "SEEK STATUS [bool]": results[:, 1], "WINDOW OPEN FRACTION [%]": results[:, 2], "FIRE LOAD DENSITY [MJ/m2]": results[:, 3], "FIRE SPREAD SPEED [m/s]": results[:, 4], "BEAM POSITION [m]": results[:, 5], "MAX. NEAR FIELD TEMPERATURE [C]": results[:, 6], "FIRE TYPE [0:P., 1:T.]": results[:, 7], "PEAK STEEL TEMPERATURE [C]": results[:, 8] - 273.15, "PROTECTION THICKNESS [m]": results[:, 9], "SEEK ITERATIONS": results[:, 10] }) df_outputs = df_outputs[[ "TIME EQUIVALENCE [min]", "PEAK STEEL TEMPERATURE [C]", "PROTECTION THICKNESS [m]", "SEEK STATUS [bool]", "WINDOW OPEN FRACTION [%]", "FIRE LOAD DENSITY [MJ/m2]", "FIRE SPREAD SPEED [m/s]", "BEAM POSITION [m]", "MAX. NEAR FIELD TEMPERATURE [C]", "FIRE TYPE [0:P., 1:T.]" ]] df_outputs.sort_values(by=["TIME EQUIVALENCE [min]"], inplace=True) df_outputs.reset_index(drop=True, inplace=True) path_results_file = os.path.join(dir_work, "{} - {}".format(id_, "res_df.p")) pdump(df_outputs, open(path_results_file, "wb"))
def rec_dump(obj, file): if (isinstance(obj, SerializableObject)): obj.serialize(file) elif (isinstance(obj, Expression)): pdump(str(obj), file) elif (isinstance(obj, list) or isinstance(obj, tuple)): pdump(InitSequenceSerializable, file) for el in obj: pdump(get_class(el), file) rec_dump(el, file) pdump(FinishSequenceSerializable, file) elif (isinstance(obj, dict)): pdump(InitSequenceSerializable, file) for key in obj: pdump(get_class(key), file) rec_dump(key, file) pdump(get_class(obj[key]), file) rec_dump(obj[key], file) pdump(FinishSequenceSerializable, file) else: pdump(obj, file)
def save_game(): global SAVE_DATA if SAVE_DATA is None: SAVE_DATA = blank_save() f = open(SAVE_FILE_PATH,'wb'); pdump(SAVE_DATA,f); f.close()
def build_people_model(host, port, **kwargs): global PROD_COMO ppl_model_data = 'ppl_model_data.pickle' batch_size = kwargs.get('batch_size', 10000) vocabulary = get_ingredient_vocabulary(host, port) # The tfidf_vect will ignore the following words stop_words = [ '', 'water', 'glycerin', 'titanium dioxide', 'iron oxides', 'beeswax', 'methylparaben', 'propylparaben', 'propylene glycol', 'panthenol', 'mica'] # Create vectorizers d_vect = DictVectorizer(sparse=False) tfidf_vect = TfidfVectorizer( tokenizer=get_ingredients_as_list, lowercase=False, stop_words=stop_words, vocabulary=vocabulary) print("Loading people from database, batch_size:", str(batch_size)) ppl_filt = {} ppl_prjctn = { '_id': False, 'race': True, 'birth_sex': True, 'age': True, 'acne': True, 'skin': True, 'acne_products': True} # Don't include any PII db_objects = PEOPLE_DB.read(ppl_filt, projection=ppl_prjctn) y, demo_mult = [], [] batch_num, pulled = 0, 0 X = None # Work in batches to build dataset while pulled <= db_objects.count(with_limit_and_skip=True): # Initialize X_demo_lst, X_prod_lst = [], [] people = [] print('Parsing batch:', batch_num) try: # Build a batch for i in range(batch_size): people.append(DB_Object.build_from_dict(db_objects.next())) pulled += 1 except StopIteration: # End of available data break # Extract features for person in people: # Create new entry for each product # Note: Model is only applicable to entries with products for product_id in person.pop('acne_products'): # Pull product ingredients info X_prod_lst.append([product_id]) # Pull demographic info X_demo_lst.append(person) # Generate demographic multiplier mult = get_multiplier(person) demo_mult.append(mult) # Vectorize data X_demo = d_vect.fit_transform(X_demo_lst) # X_demo is now a numpy array X_prod = tfidf_vect.fit_transform(X_prod_lst) # X_prod is now a CSR sparse matrix # Add batch result to output matrix if X is not None: X_t = hstack([csr_matrix(X_demo), X_prod], format="csr") try: X = vstack([X, X_t], format="csr") except ValueError: break else: # Initialize X X = hstack([csr_matrix(X_demo), X_prod], format="csr") batch_num += 1 for como, mult in zip(PROD_COMO, demo_mult): val = como * mult if val < 6: y.append(0) elif val < 12: y.append(1) else: y.append(2) print('Storing vectorized data and training labels') # Flatten CSR sparse matrix to strings model = { 'X': X, 'y': y, 'd_vect': d_vect, 'tfidf_vect': tfidf_vect, 'vocabulary': vocabulary } print("Saving model data to disk for next time") # Insert the model into the model database MODEL_DB.create_file(pdumps(model, protocol=2), filename="ml_people_data") # Save model data to disk with open(ppl_model_data, "wb") as pickle_out: pdump(model, pickle_out) print('[SUCCESS] People model data post-processed and stored')
def step2_calc(df_input, dict_pref, path_input_file, progress_print_interval=5): # LOCAL SETTINGS # ============== # To limit memory usage when multiprocessing is employed, a maximum number of tasks is defined for a single process. # Therefore a process can not preserve data over this limit. mp_maxtasksperchild = 1000 # Load kwargs dict_input_kwargs = df_input.to_dict(orient="index") list_kwargs = [] for key, val in dict_input_kwargs.items(): val["index"] = key list_kwargs.append(val) # Load settings dict_settings = dict_pref n_proc = dict_settings["n_proc"] # Check number of processes are to be used n_proc = os.cpu_count() if int(n_proc) < 1 else int(n_proc) # SIMULATION START print( __strformat_1_1.format("Input file:", os.path.basename(path_input_file))) print(__strformat_1_1.format("Total simulations:", len(list_kwargs))) print(__strformat_1_1.format("Number of threads:", n_proc)) time_simulation_start = time.perf_counter() m = mp.Manager() q = m.Queue() p = mp.Pool(n_proc, maxtasksperchild=mp_maxtasksperchild) jobs = p.map_async(calc_time_equiv_worker, [(kwargs, q) for kwargs in list_kwargs]) count_total_simulations = len(list_kwargs) n_steps = 24 # length of the progress bar while progress_print_interval: if jobs.ready(): time_simulation_consumed = time.perf_counter( ) - time_simulation_start print("{}{} {:.1f}s".format('█' * round(n_steps), '-' * round(0), time_simulation_consumed)) break else: p_ = q.qsize() / count_total_simulations * n_steps print("{}{} {:03.1f}%".format('█' * int(round(p_)), '-' * int(n_steps - round(p_)), p_ / n_steps * 100), end='\r') time.sleep(1) p.close() p.join() results = jobs.get() # format outputs results = np.array(results) df_output = pd.DataFrame({ 'TIME STEP [s]': results[:, 0], 'TIME START [s]': results[:, 1], 'TIME LIMITING []': results[:, 2], 'WINDOW HEIGHT [m]': results[:, 3], 'WINDOW WIDTH [m]': results[:, 4], 'WINDOW OPEN FRACTION []': results[:, 5], 'ROOM BREADTH [m]': results[:, 6], 'ROOM DEPTH [m]': results[:, 7], 'ROOM HEIGHT [m]': results[:, 8], 'ROOM WALL THERMAL INERTIA [J/m2s1/2K]': results[:, 9], 'FIRE LOAD DENSITY [MJ/m2]': results[:, 10], 'FIRE HRR DENSITY [MW/m2]': results[:, 11], 'FIRE SPREAD SPEED [m/s]': results[:, 12], 'FIRE DURATION [s]': results[:, 13], 'BEAM POSITION [m]': results[:, 14], 'BEAM RHO [kg/m3]': results[:, 15], 'BEAM C [-]': results[:, 16], 'BEAM CROSS-SECTION AREA [m2]': results[:, 17], 'BEAM FAILURE TEMPERATURE [C]': results[:, 18], 'PROTECTION K [W/m/K]': results[:, 19], 'PROTECTION RHO [kg/m3]': results[:, 20], 'PROTECTION C OBJECT []': results[:, 21], 'PROTECTION THICKNESS [m]': results[:, 22], 'PROTECTION PERIMETER [m]': results[:, 23], 'ISO834 TIME ARRAY [s]': results[:, 24], 'ISO834 TEMPERATURE ARRAY [K]': results[:, 25], 'MAX. NEAR FIELD TEMPERATURE [C]': results[:, 26], 'SEEK ITERATION LIMIT []': results[:, 27], 'SEEK PROTECTION THICKNESS UPPER BOUND [m]': results[:, 28], 'SEEK PROTECTION THICKNESS LOWER BOUND [m]': results[:, 29], 'SEEK BEAM FAILURE TEMPERATURE TOLERANCE [K]': results[:, 30], 'INDEX': results[:, 31], 'TIME EQUIVALENCE [s]': results[:, 32], 'SEEK STATUS [0:Fail, 1:Success]': results[:, 33], 'FIRE TYPE [0:P, 1:T]': results[:, 34], 'SOUGHT BEAM TEMPERATURE [K]': results[:, 35], 'SOUGHT BEAM PROTECTION THICKNESS [m]': results[:, 36], 'SOUGHT ITERATIONS []': results[:, 37], 'BEAM TEMPERATURE TO FIXED PROTECTION THICKNESS [K]': results[:, 38], 'FIRE TIME ARRAY [s]': results[:, 39], 'FIRE TEMPERATURE ARRAY [K]': results[:, 40], 'OPENING FACTOR [m0.5]': results[:, 41] }) df_output.set_index( "INDEX", inplace=True) # assign 'INDEX' column as DataFrame index df_output.sort_values('TIME EQUIVALENCE [s]', inplace=True) # sort base on time equivalence path_results_file = os.path.join( os.path.dirname(path_input_file), "{} - {}".format( os.path.basename(path_input_file).split('.')[0], __fn_output)) pdump(df_output, open(path_results_file, "wb")) return df_output
def step2_main_calc(path_input_file, progress_print_interval=5): # Settings (local) mp_maxtasksperchild = 1000 # Make prefix, suffix, file and directory strings dir_work = os.path.dirname(path_input_file) name_kwargs_file = os.path.basename(path_input_file) id_ = name_kwargs_file.split(" - ")[0] # Load kwargs df_input_kwargs = pload(open(path_input_file, "rb")) dict_input_kwargs = df_input_kwargs.to_dict(orient="index") list_kwargs = [] for key, val in dict_input_kwargs.items(): val["index"] = key list_kwargs.append(val) # list_kwargs = [val for key, val in dict_input_kwargs.items()] # Load settings path_settings_file = os.path.join(dir_work, "{} - {}".format(id_, "prefs.p")) dict_settings = pload(open(path_settings_file, "rb")) n_proc = dict_settings["n_proc"] # Check number of processes are to be used n_proc = os.cpu_count() if int(n_proc) < 1 else int(n_proc) # Work out sleep time between each check print progress progress_print_sleep = len(list_kwargs) / 200 if progress_print_sleep < 0.5: progress_print_sleep = 0.5 elif progress_print_sleep > 500: progress_print_sleep = 500 # SIMULATION print(strformat_1_1.format("Input file:", id_)) print(strformat_1_1.format("Total simulations:", len(list_kwargs))) print(strformat_1_1.format("Number of threads:", n_proc)) time_count_simulation = time.perf_counter() m = mp.Manager() q = m.Queue() p = mp.Pool(n_proc, maxtasksperchild=mp_maxtasksperchild) jobs = p.map_async(calc_time_equiv_worker, [(kwargs, q) for kwargs in list_kwargs]) count_total_simulations = len(list_kwargs) progress_now = - progress_print_interval while progress_print_interval: if jobs.ready(): break else: progress_now_ = int(q.qsize() * 100 / count_total_simulations) if progress_now_ >= (progress_now + progress_print_interval): progress_now = int(progress_now_/progress_print_interval) * progress_print_interval print(strformat_1_1_1.format("Simulation progress:", str(progress_now), "%")) time.sleep(progress_print_sleep) p.close() p.join() results = jobs.get() time_count_simulation = time.perf_counter() - time_count_simulation print(strformat_1_1_1.format("Simulation completed in:", str(int(time_count_simulation)), "s")) # format outputs results = np.array(results, dtype=float) df_outputs = pd.DataFrame({"TIME EQUIVALENCE [min]": results[:, 0]/60., "SEEK STATUS [bool]": results[:, 1], "WINDOW OPEN FRACTION [%]": results[:, 2], "FIRE LOAD DENSITY [MJ/m2]": results[:, 3], "FIRE SPREAD SPEED [m/s]": results[:, 4], "BEAM POSITION [m]": results[:, 5], "MAX. NEAR FIELD TEMPERATURE [C]": results[:, 6], "FIRE TYPE [0:P., 1:T.]": results[:, 7], "PEAK STEEL TEMPERATURE TO GOAL SEEK [C]": results[:, 8]-273.15, "PROTECTION THICKNESS [m]": results[:, 9], "SEEK ITERATIONS [-]": results[:, 10], "PEAK STEEL TEMPERATURE TO FIXED PROTECTION [C]": np.sort(results[:, 11])-273.15, "INDEX": results[:, 12]}) df_outputs = df_outputs[["TIME EQUIVALENCE [min]", "PEAK STEEL TEMPERATURE TO GOAL SEEK [C]", "PROTECTION THICKNESS [m]", "SEEK STATUS [bool]", "SEEK ITERATIONS [-]", "WINDOW OPEN FRACTION [%]", "FIRE LOAD DENSITY [MJ/m2]", "FIRE SPREAD SPEED [m/s]", "BEAM POSITION [m]", "MAX. NEAR FIELD TEMPERATURE [C]", "FIRE TYPE [0:P., 1:T.]", "PEAK STEEL TEMPERATURE TO FIXED PROTECTION [C]", "INDEX"]] df_outputs.set_index("INDEX", inplace=True) # df_outputs.sort_values(by=["TIME EQUIVALENCE [min]"], inplace=True) # df_outputs.reset_index(drop=True, inplace=True) path_results_file = os.path.join(dir_work, "{} - {}".format(id_, "res_df.p")) pdump(df_outputs, open(path_results_file, "wb")) saveprint(os.path.basename(path_results_file))
) ) # check install directory install serialisation file # redfine this to include check where eqparse package is installed #from eqparse import __eqp_dir, __eqp_memory_file, __eqp_memory_file_comb, __eqp_mac_address as mac_address from eqparse import __eqp_dir, __eqp_memory_file, __eqp_memory_file_comb, __eqp_mac_address as mac_address from pickle import dump as pdump from pickle import load as pload for file_name in [__eqp_memory_file, __eqp_memory_file_comb]: file = open(file_name,'wb') file.truncate() file.close() fprop = {}; pdump(fprop,open(file_name,'wb')) #file = open(__eqp_memory_file,'wb') #file.truncate() #file.close() #fprop = {}; #pdump(fprop,open(__eqp_memory_file,'wb')) #file = open(__eqp_memory_file_comb,'wb') #file.truncate() #fcomb = {} #file.close() #pdump(fcomb,open(__eqp_memory_file_comb,'wb'))
def __init__(self): super(DistributedTrainer, self).__init__() pdump(self.haar_locs, open("haar_locs.pyz", 'w'), -1)
def __init__(self): super(DistributedTrainer,self).__init__() pdump(self.haar_locs,open("haar_locs.pyz",'w'),-1)
else: phrase=ligne arbre= evaluation.readtree(evaluation.tokenize(phrase))[0] n,t = evaluation.nodesandleaves(arbre) nonterminaux.update(n) terminaux.update(t) productions=evaluation.getchildren(arbre) for elem in productions: leftside[elem] += len(productions[elem]) for prod in productions[elem]: rightside[elem][prod] += 1 for nt in rightside: sumproba=0 for prod in rightside[nt]: prodproba=fractions.Fraction(rightside[nt][prod],leftside[nt]) rightside[nt][prod]=prodproba sumproba += prodproba assert(sumproba==1) grammaire=CNF(terminaux, nonterminaux,rightside) with open(args.output,"wb") as f: pdump(grammaire,f)
sink.glob_meanRho= interp_coarse_to_fine(glob.time,glob.meanRho, sink.time) #get local rmsV,rmsVa,meanRho data=np.loadtxt(args.rms_local_global+'local_rmsV_rmsVa.txt', dtype=np.string_) sort_ind= np.argsort(data[:,1]) #sorted index for time to increase monotonically sid_ind= np.arange(2,data.shape[1]-1,4) #index of each sink id, starts at 2, then every 4th index after that #get object to hold local rmsV,etc, data n_hdf5_files= data.shape[0] local= LocalData(args.NSinks,n_hdf5_files,npts+1) #get LocalData from data local.time= data[:,1][sort_ind].astype(np.float128) #local data from each sink for cnt,i in enumerate(sid_ind): print "cnt,i= ",cnt,i local.sid[cnt]= data[:,i][0] #redundant, but keeps writting in the correct sid print "sid[cnt],data[:,i][0]=",local.sid[cnt],data[:,i][0] local.rmsV[:,cnt]= data[:,i+1][sort_ind].astype(np.float128) #i+1 is ith sink's rmsV at all times local.rmsVa[:,cnt]= data[:,i+2][sort_ind].astype(np.float128) local.meanRho[:,cnt]= data[:,i+3][sort_ind].astype(np.float128) #interpolate for i in range(args.NSinks): sink.rmsV[:,i]= interp_coarse_to_fine(local.time,local.rmsV[:,i], sink.time) sink.rmsVa[:,i]= interp_coarse_to_fine(local.time,local.rmsVa[:,i], sink.time) sink.meanRho[:,i]= interp_coarse_to_fine(local.time,local.meanRho[:,i], sink.time) #save data print "saving data" if args.rms_local_global: name="sinks_yesRms.pickle" else: name= "sinks_noRms.pickle" fout=open(os.path.join(args.path,name),"w") pdump(sink,fout) fout.close()
def main(): args = parse_args() texts_mapping = jload(open(args.mapping_path)) # для кросс-языковой векторизации должно быть указано направление и путь к общей матрице векторов lang_required = {'cross': ['direction', 'common_output_vectors_path']} check_args(args, 'lang', lang_required) # для кроссязыковой векторизации if args.lang == 'cross': model_required = {'model': ['src_embeddings_path', 'tar_embeddings_path'], 'translation': ['tar_embeddings_path', 'bidict_path'], 'projection': ['src_embeddings_path', 'tar_embeddings_path', 'projection_path'] } check_args(args, 'method', model_required) if args.method == 'translation': args.src_embeddings_path = args.tar_embeddings_path directions = {d: lang for d, lang in zip(['src', 'tar'], args.direction.split('-'))} print(directions) print('Векторизую src') src_vectorized = main_onelang('src', directions['src'], texts_mapping, args.src_lemmatized_path, args.src_embeddings_path, args.src_output_vectors_path, args.method, args.no_duplicates, args.projection_path, args.bidict_path, args.forced) # print(src_vectorized) print('Векторизую tar') tar_vectorized = main_onelang('tar', directions['tar'], texts_mapping, args.tar_lemmatized_path, args.tar_embeddings_path, args.tar_output_vectors_path, args.method, args.no_duplicates, args.projection_path, args.bidict_path, args.forced) # print(tar_vectorized) # собираем общие матрицу и маппинг common_len = len(src_vectorized) + len(tar_vectorized) emb_dim = src_vectorized.shape[1] common_vectorized = np.zeros((common_len, emb_dim)) print(common_vectorized.shape) common2i = {} i2common = {} common_vectorized, common2i, i2common = to_common(texts_mapping, common2i, i2common, common_vectorized, tar_vectorized, directions['tar'], start_from=0) common_vectorized, common2i, i2common = to_common(texts_mapping, common2i, i2common, common_vectorized, src_vectorized, directions['src'], start_from=len(tar_vectorized)) pdump(common_vectorized, open(args.common_output_vectors_path, 'wb')) texts_mapping['cross2i'] = common2i texts_mapping['i2cross'] = i2common jdump(texts_mapping, open(args.mapping_path, 'w', encoding='utf-8')) # print(i2common) # print(common2i) # для векторизации одноязычного корпуса (без сборки общей матрицы векторов и общего маппинга) else: model_required = {'model': ['src_embeddings_path'], 'translation': ['tar_embeddings_path', 'bidict_path'], 'projection': ['src_embeddings_path', 'tar_embeddings_path', 'projection_path'] } check_args(args, 'method', model_required) if args.method == 'translation': args.src_embeddings_path = args.tar_embeddings_path print('Векторизую корпус') src_vectorized = main_onelang('src', args.lang, texts_mapping, args.src_lemmatized_path, args.src_embeddings_path, args.src_output_vectors_path, args.method, args.no_duplicates, args.projection_path, args.bidict_path, args.forced)
def serialize(self, file): r''' Method to dump an object into a file. This methods writes to a file (it opens it if it is given as a string) an object. This method relies on the package ``pickle`` for those objects that are not serializable. Otherwise, it will use a recursive serialization procedure to write the whole object. ''' from pickle import dump as pdump from sage.all import Expression ## Auxiliar method for the recursive loading procedure def rec_dump(obj, file): if (isinstance(obj, SerializableObject)): obj.serialize(file) elif (isinstance(obj, Expression)): pdump(str(obj), file) elif (isinstance(obj, list) or isinstance(obj, tuple)): pdump(InitSequenceSerializable, file) for el in obj: pdump(get_class(el), file) rec_dump(el, file) pdump(FinishSequenceSerializable, file) elif (isinstance(obj, dict)): pdump(InitSequenceSerializable, file) for key in obj: pdump(get_class(key), file) rec_dump(key, file) pdump(get_class(obj[key]), file) rec_dump(obj[key], file) pdump(FinishSequenceSerializable, file) else: pdump(obj, file) def get_class(obj): if (obj is None): return None return obj.__class__ # Checking the file is opened is_str = (type(file) == str) if (is_str): file = open(file, "wb+") # Serializing the list of types for args pdump([get_class(obj) for obj in self.sargs()], file) # Serializing the arguments for obj in self.sargs(): rec_dump(obj, file) # Serializing the list of named arguments pdump([(key, get_class(self.skwds()[key])) for key in self.skwds()], file) # Serializing the arguments for key in self.skwds(): rec_dump(self.skwds()[key], file) # Closing the file if we opened it if (is_str): file.close()
i += size times = comm.gather(times, root=0) if rank == 0: all_times = {} for d in times: for k, v in d.iteritems(): all_times.setdefault(k, []).append(v) phi_mean = comm.gather(phi_mean, root=0) if rank == 0: all_phi_mean = {} for d in phi_mean: for k, v in d.iteritems(): all_phi_mean.setdefault(k, []).append(v) phi_median = comm.gather(phi_median, root=0) if rank == 0: all_phi_median = {} for d in phi_median: for k, v in d.iteritems(): all_phi_median.setdefault(k, []).append(v) # save data if rank == 0: name = spath + "K06_Table1_logrBL_%.1f_M0_%.1f.pickle" % (log_rB_L, M0i) fout = open(name, "w") pdump((times, phi_mean, phi_median), fout) fout.close() print "gathered and pickle dumped data"
def main_onelang(direction, lang, texts_mapping, lemmatized_path, embeddings_path, output_vectors_path, method, no_duplicates, projection_path, bidict_path, forced): i2lang = 'i2{}'.format(lang) # собираем лемматизированные тексты из lemmatized if not os.path.isfile( lemmatized_path): # ничего ещё из этого корпуса не разбирали raise NotLemmatizedError() else: # если существует уже разбор каких-то файлов lemmatized_dict = jload(open(lemmatized_path, encoding='utf-8')) print('Понял, сейчас векторизуем.') # подгружаем старое, если было old_vectorized = load_vectorized(output_vectors_path, forced) # появились ли новые номера в маппинге n_new_texts = len(texts_mapping[i2lang]) - len(old_vectorized) print('Новых текстов: {}'.format(n_new_texts)) if not n_new_texts: # если не нашлось новых текстов return old_vectorized else: # собираем всё, что есть лемматизированного и нелемматизированного lemmatized_corpus = get_lemmatized_corpus(texts_mapping, i2lang, lemmatized_dict, n_new_texts) # for i in lemmatized_corpus: # print(i) # для tar всегда загружаем верисю model vectorizer = build_vectorizer(direction, method, embeddings_path, no_duplicates, projection_path, bidict_path) # за размер нового корпуса принимаем длину маппинга new_vectorized = np.zeros( (len(texts_mapping[i2lang]), vectorizer.dim)) # заполняем старые строчки, если они были for nr, line in enumerate(old_vectorized): new_vectorized[nr, :] = line # print(new_vectorized) # print(new_vectorized.shape) new_vectorized, not_vectorized = vectorize_corpus( lemmatized_corpus, new_vectorized, vectorizer, starts_from=len(old_vectorized)) if output_vectors_path: pdump(new_vectorized, open(output_vectors_path, 'wb')) if not_vectorized: print('Не удалось векторизовать следующие тексты:\n{}'.format( '\t'.join(not_vectorized))) return new_vectorized