def optimize_people_model(host, port): ppl_model_data = 'ppl_model_data.pickle' result_data = 'estimator_results.pickle' print("Loading model data") try: with open(ppl_model_data, "rb") as pickle_in: model = pload(pickle_in) print('Loaded from Pickle') except Exception as e: print("Loading from database...", e) x = MODEL_DB.read_file('ml_product_data').sort("uploadDate", -1).limit(1) model = pload(x[0]) print("Saving model data to disk for next time") with open(ppl_model_data, "wb") as pickle_out: pdump(model, pickle_out) print("Running gridsearchCV") estimator_results = test_estimators(est_dicts, model) plot_best_estimator(estimator_results) print( "Saving gridsearchCV results, explore by un-pickling", result_data, "with an IPython shell or python program.") with open(result_data, "wb") as pickle_out: pdump(estimator_results, pickle_out)
def step4_results_visulisation(path_input_file): # Make file and directory names dir_work = os.path.dirname(path_input_file) obj_file_name = os.path.basename(path_input_file) id_ = obj_file_name.split(" - ")[0] # Obtain ID string # Load settings path_settings_file = os.path.join(dir_work, "{} - {}".format(id_, "prefs.p")) dict_settings = pload(open(path_settings_file, "rb")) height_building = dict_settings["building_height"] # Load the dataframe obj file df_results = pload(open(path_input_file, "rb")) # Define horizontal line(s) to plot if height_building > 0: y__ = 1 - 64.8 / height_building ** 2 else: y__ = 0.5 # Obtain time equivalence, in minutes, as x-axis values x = np.sort(df_results["TIME EQUIVALENCE [min]"].values * 60.) y = np.arange(1, len(x) + 1) / len(x) f_interp = interp1d(y, x) if height_building > 0: y_line = 1 - 64.8 / height_building ** 2 x_line = f_interp(y_line) else: x_line = y_line = 0 plt = Scatter2D() plt_format = {"figure_size_scale": 0.7, "axis_lim_y1": (0, 1), "axis_lim_x": (0, 120), "legend_is_shown": False, "axis_label_x": "Time Equivalence [min]", "axis_label_y1": "Fractile", "marker_size": 0} plt.plot2(x/60, y, "Simulation results") plt.format(**plt_format) if height_building > 0: x_end = plt.axes_primary.get_xlim()[1] y_end = plt.axes_primary.get_ylim()[1] x_line_ = x_line/60. y_line_ = y_line plt.plot_vertical_line(x=x_line_) plt.axes_primary.text(x=x_line_, y=y_end, s="{:.0f}".format(x_line_), va="bottom", ha="center", fontsize=6) plt.plot_horizontal_line(y=y_line_) plt.axes_primary.text(x=x_end, y=y_line_, s="{:.4f}".format(y_line_), va="center", ha="left", fontsize=4) file_name = "{} - {}{}".format(id_, "res_plot_teq", ".png") file_path = os.path.join(dir_work, file_name) plt.save_figure2(file_path) saveprint(file_name)
def load_data(): input_setences, tags = None, None with open("./dumps/naver_movies_sentences.bin", 'rb') as fp: input_setences = pload(fp) with open("./dumps/tags.bin", 'rb') as fp: tags = pload(fp) if not input_setences or not tags: raise TypeError return input_setences, tags
def showSettings(self, where): if self.showsettings.get() == 0: self.app.grid_forget() self.update() self.top.update() set_save="" export = [] if self.thetype == "IPs": set_save = self.a.IPdimensions if self.thetype == "URLs": set_save = self.a.URLdimensions if self.thetype == "SPECs": set_save = self.a.SPECIALdimensions export.append(int(self.winfo_width())) export.append(int(self.winfo_height())) pdump( export, open(set_save, 'wb')) if self.showsettings.get() == 1: self.app.grid(column=4, row=where, sticky="NW") self.app.update() self.update() self.top.update() set_save = "" export = [] if self.thetype == "IPs": set_save = self.a.IPdimensions if self.thetype == "URLs": set_save = self.a.URLdimensions if self.thetype == "SPECs": set_save = self.a.SPECIALdimensions export.append(int(self.winfo_width())) export.append(int(self.winfo_height())) pdump(export, open(set_save, 'wb')) if (path.getsize(self.a.IPdimensions) > 0) and (path.getsize(self.a.URLdimensions) > 0) and (path.getsize(self.a.SPECIALdimensions) > 0): self.update() IPs_dim = pload(open(self.a.IPdimensions, "rb")) URLs_dim = pload(open(self.a.URLdimensions, "rb")) SPECs_dim = pload(open(self.a.SPECIALdimensions, "rb")) theIPx = int(IPs_dim[0]) theIPy = int(IPs_dim[1]) theURLx = int(URLs_dim[0]) theURLy = int(URLs_dim[1]) theSPECx = int(SPECs_dim[0]) theSPECy = int(SPECs_dim[1]) newx = 0 newy = theIPy + theURLy + theSPECy + 15 if (theIPx > newx): newx = theIPx if (theURLx > newx): newx = theURLx if (theSPECx > newx): newx = theSPECx self.top.geometry(str(newx) + "x" + str(newy)) self.top.update()
def step6_results_visualization_temperature(path_input_file): # Make file and directory names dir_work = os.path.dirname(path_input_file) obj_file_name = os.path.basename(path_input_file) id_ = obj_file_name.split(" - ")[0] # Obtain ID string # Load settings path_settings_file = os.path.join(dir_work, "{} - {}".format(id_, "prefs.p")) dict_settings = pload(open(path_settings_file, "rb")) height_building = dict_settings["building_height"] # Load the dataframe obj file df_results = pload(open(path_input_file, "rb")) # Define horizontal line(s) to plot # if height_building > 0: # y__ = 1 - 64.8 / height_building ** 2 # else: # y__ = 0.5 # Obtain time equivalence, in minutes, as x-axis values x = df_results["PEAK STEEL TEMPERATURE TO FIXED PROTECTION [C]"].values x = np.sort(x) y = np.arange(1, len(x) + 1) / len(x) # f_interp = interp1d(y, x) # if height_building > 0: # y_line = 1 - 64.8 / height_building ** 2 # x_line = f_interp(y_line) # else: # x_line = y_line = 0 plt = Scatter2D() plt_format = {"figure_size_scale": 0.7, "axis_lim_y1": (0, 1), # "axis_lim_x": (0, 120), "legend_is_shown": False, "axis_label_x": "Peak Steel Temperature [$^\circ$C]", "axis_label_y1": "Fractile", "marker_size": 0} plt.plot2(x, y, "Simulation results") plt.format(**plt_format) file_name = "{} - {}{}".format(id_, "res_plot_temp", ".png") file_path = os.path.join(dir_work, file_name) plt.save_figure2(file_path) saveprint(os.path.basename(file_path))
def from_list_of_dicts(cls, events_list): """ Returns an Events instance built using the given dicts. Parameters ---------- events_list (list of dictionaries, or path to pickled file) This can either be an actual list, or the location of a pickled list. We'll group the events by 'name' into DataFrames, then pass those to from_dict. Note that only field that appear in every dictionary with a given name will make it through to the corresponding DataFrame. """ import os from pickle import load as pload if isinstance(events_list, str): if not os.path.isfile(events_list): raise ValueError("Could not find file %s" % events_list) evl = pload(events_list) else: evl = events_list evd = {} for ev in evl: if not "name" in ev: continue if not ev["name"] in evd: evd[ev["name"]] = [ev] else: evd[ev["name"]].append(ev) for k in list(evd.keys()): evd[k] = pd.DataFrame(evd[k]) return cls.from_dict(evd)
def main(args): args.top_ns = arg_to_list(args.top_ns) lang2i = '{}2i'.format(args.lang) texts_mapping = jload(open(args.mapping_path)) corpus_vecs = pload(open(args.corpus_vectors_path, 'rb')) golden_standard_ids = load_golden_standart(args.golden_standard_path, texts_mapping, lang2i) corp_sims = [] # списки предсказанного для каждого текста for i in tqdm(range(len(corpus_vecs))): target_vec = corpus_vecs[i] sim_ids = predict_sim(target_vec, corpus_vecs) corp_sims.append(sim_ids) top_accuracies = [ eval_acc(top_n, golden_standard_ids, corp_sims) for top_n in args.top_ns ] top_strings = [ 'ТОП-{}:\t{}'.format(top_n, top_acc) for top_n, top_acc in zip(args.top_ns, top_accuracies) ] print('\n'.join(top_strings))
def __authorize(self): # Get credentials credentials = None if not USE_SERVICE_ACCOUNTS: if ospath.exists(self.__G_DRIVE_TOKEN_FILE): with open(self.__G_DRIVE_TOKEN_FILE, 'rb') as f: credentials = pload(f) if credentials is None or not credentials.valid: if credentials and credentials.expired and credentials.refresh_token: credentials.refresh(Request()) else: flow = InstalledAppFlow.from_client_secrets_file( 'credentials.json', self.__OAUTH_SCOPE) LOGGER.info(flow) credentials = flow.run_console(port=0) # Save the credentials for the next run with open(self.__G_DRIVE_TOKEN_FILE, 'wb') as token: pdump(credentials, token) else: LOGGER.info( f"Authorizing with {SERVICE_ACCOUNT_INDEX}.json service account" ) credentials = service_account.Credentials.from_service_account_file( f'accounts/{SERVICE_ACCOUNT_INDEX}.json', scopes=self.__OAUTH_SCOPE) return build('drive', 'v3', credentials=credentials, cache_discovery=False)
def __init__(self, file_path="savefile"): try: with open(file_path, "rb") as read_file: self.user_text = pload(read_file) except (IOError, EOFError): self.user_text = dict() self.save_file = open("savefile", "wb")
def run(project_full_paths=list()): _strfmt_1_1 = "{:25}{}" __fn_output = "res.p" global __run_count __run_count += 1 if len(project_full_paths) == 0 or __run_count > 1: from tkinter import filedialog, Tk, StringVar root = Tk() root.withdraw() folder_path = StringVar() while True: file_name = filedialog.askdirectory( title='Select problem definitions folder') if not file_name: break folder_path.set(file_name) project_full_paths.append(file_name) # MAIN BODY # ========= for project_full_path in project_full_paths: list_path_input_file = step0_parse_input_files( dir_work=project_full_path) list_path_input_file.sort() list_input, list_output, list_pref, list_id = [], [], [], [] print(_strfmt_1_1.format("Work directory:", project_full_path)) for path_input_file in list_path_input_file: id_ = os.path.basename(path_input_file).split(".")[0] # Step 1: make a list of key word arguments for function inputs # ------------------------------------------------------------- df_input, dict_pref = step1_inputs_maker(path_input_file) ff = os.path.join(project_full_path, " - ".join([id_, __fn_output])) if os.path.isfile(ff): df_output = pload(open(ff, "rb")) else: # Step 2: perform main time equivalence calculation # ------------------------------------------------- df_output = step2_calc(df_input, dict_pref, path_input_file) list_input.append(df_input) list_id.append(id_) list_pref.append(dict_pref) list_output.append(df_output) # Step 3: select fire curves and output numerical and graphical files # ------------------------------------------------------------------- step3_calc_post(list_path_input_file, list_pref, list_output) input("Press Enter to finish")
def step4_results_visulisation(path_input_file, height_building): dir_work = os.path.dirname(path_input_file) obj_file_name = os.path.basename(path_input_file) # Obtain ID string id_ = obj_file_name.split(" - ")[0] # Load the dataframe obj file df_results = pload(open(path_input_file, "rb")) # Obtain time equivalence, in minutes, as x-axis values x = df_results["TIME EQUIVALENCE [min]"].values * 60. # Filter out entries with failed seek status, i.e. peak steel temperature is not within the tolerance. # seek_status = df_results["SEEK STATUS [bool]"].values # x = x[seek_status == 1] # Define horizontal line(s) to plot y__ = 1 - 64.8 / height_building**2 x, y, x_, y_, xy_found = mc_post_processing(x, y_find=[y__]) plt = Scatter2D() plt.plot2(x / 60, y, "Simulation results") # plt.plot2(x_/60, y_, "Interpolated CDF") plt.format( **{ "figure_size_scale": 0.7, "axis_lim_y1": (0, 1), "axis_lim_x": (0, 120), "legend_is_shown": False, "axis_label_x": "Time Equivalence [min]", "axis_label_y1": "Fractile", "marker_size": 0 }) # plt.update_line_format("Interpolated CDF", **{"line_width": 0.5, "color": "black", "line_style": ":"}) for i in np.arange(0, len(xy_found), 1): x_found, y_found = xy_found[i, :] plt.plot_vertical_line(x=x_found / 60.) plt.plot_horizontal_line(y=y_found) plt.axes_primary.text(x=x_found / 60 + 1, y=y_found - 0.01, s="({:.0f}, {:.4f})".format( x_found / 60, y_found), va="top", ha="left", fontsize=6) # plt.update_legend(legend_loc=0) plt.save_figure(file_name=" - ".join([id_, "res_plot"]), file_format=".pdf", dir_folder=dir_work) print("POST PROCESSING COMPLETE")
def open_df(filename): '''Opens pickled DataFrame object Args: filename (str): path to pickled file Returns: DataFrame: opened DataFrame ''' with open(filename, 'rb') as data_file: return pload(data_file)
def main(target_article_path, lang, mapping_path, corpus_vectors_path, top=1, verbose=1, included=1, udpipe_path='', keep_pos=1, keep_stops=0, keep_punct=0, method='', embeddings_path='', bidict_path='', projection_path='', no_duplicates=0, with_url=0, url_mapping_path=''): i2lang = 'i2{}'.format(lang) lang2i = '{}2i'.format(lang) texts_mapping = jload(open(mapping_path)) corpus_vecs = pload(open(corpus_vectors_path, 'rb')) article_data = None if with_url: article_data = load_article_data(url_mapping_path) if included: target_article = target_article_path target_article_id = texts_mapping[lang2i].get(target_article) if target_article_id is None: # просто not нельзя, т.к. есть статьи с id 0 print(texts_mapping['en2i']) print(target_article) raise NotIncludedError target_article_vec = corpus_vecs[target_article_id] else: target_article, target_article_vec = prepare_new_article( target_article_path, udpipe_path, keep_pos, keep_punct, keep_stops, method, embeddings_path, no_duplicates, projection_path, bidict_path) similars = search_similar(target_article_vec, corpus_vecs) rating, verbosed_rating, missed_urls = make_rating(target_article, similars, verbose, top, included, texts_mapping, i2lang, with_url, article_data) return rating, verbosed_rating, missed_urls
def load_vectorized(output_vectors_path, forced): """загрузка матрицы с векторами корпуса, если есть""" # если существует уже какой-то векторизованный корпус if os.path.isfile(output_vectors_path) and not forced: vectorized = pload(open(output_vectors_path, 'rb')) print('Уже что-то векторизовали!', file=sys.stderr) else: # ничего ещё из этого корпуса не векторизовали или принудительно обновляем всё print('Ничего ещё не разбирали, сейчас будем.', file=sys.stderr) vectorized = [] return vectorized
def rec_load(ctype, file): if (issubclass(ctype, SerializableObject)): return ctype.unserialize(file) elif (issubclass(ctype, list) or issubclass(ctype, tuple)): # Reading the starting mark pload(file) # Starting the empty collection res = [] # Reading each element current = pload(file) while (current != FinishSequenceSerializable): # If it is not over, current is the type for the next element res += [rec_load(get_class(current), file)] current = pload(file) # Return the appropriate type of collection return ctype(res) elif (issubclass(ctype, dict)): # Reading the starting mark pload(file) # Starting the empty collection res = {} # Reading each element current = pload(file) while (current != FinishSequenceSerializable): # If it is not over, current is the type for the next key key = rec_load(get_class(current), file) v_type = pload(file) value = rec_load(get_class(v_type), file) res[key] = value current = pload(file) return res else: return pload(file)
def load(root): with open(join(root, "db.pkl"), "rb") as f: db = pload(f) if not compatible(* db.version): raise IncompatibleVersion # just to be safe db.root = root db.lock() return db
def step3_results_numerical(path_input_file): # Make prefix, suffix, file and directory strings dir_cwd = os.path.dirname(path_input_file) name_obj_file = os.path.basename(path_input_file) id_ = name_obj_file.split(" - ")[0] path_csv_file = os.path.join(dir_cwd, "{} - {}".format(id_, "res_num.csv")) # Load the dataframe obj file df_results = pload(open(path_input_file, "rb")) # Save the dataframe to csv file df_results.to_csv(path_csv_file, index=True, sep=",") saveprint(os.path.basename(path_csv_file))
def load_model(cls, path): """Loads a saved model from a .hdf5 file and updates its attributes. In case of a multi-output model, the .pkl file is loaded, since .hdf5 is not supported yet. Parameters: path (str): Path including the file name, from where the model should be loaded. Returns: GPy.models: Instantiated surrogate model. """ from profit.util import load from .encoders import Encoder from GPy import models self = cls() try: sur_dict = load(path, as_type='dict') self.model = models.GPRegression.from_dict(sur_dict['model']) self.Xtrain = sur_dict['Xtrain'] self.ytrain = sur_dict['ytrain'] self.encoder = [ Encoder(func, cols, out) for func, cols, out in eval(sur_dict['encoder']) ] except (OSError, FileNotFoundError): from pickle import load as pload from os.path import splitext # Load multi-output model from pickle file print( "File {} not found. Trying to find a .pkl file with multi-output instead." .format(path)) self.model, self.Xtrain, self.ytrain, encoder_str = pload( open(splitext(path)[0] + '.pkl', 'rb')) self.encoder = [ Encoder(func, cols, out) for func, cols, out in eval(encoder_str) ] self.output_ndim = int(max(self.model.X[:, -1])) + 1 self.multi_output = True # Initialize the encoder by encoding and decoding the training data once. self.encode_training_data() self.decode_training_data() self.kernel = self.model.kern self._set_hyperparameters_from_model() self.ndim = self.Xtrain.shape[-1] self.trained = True self.print_hyperparameters("Loaded") return self
def step3_results_numerical(path_input_file): dir_work = os.path.dirname(path_input_file) obj_file_name = os.path.basename(path_input_file) # Obtain ID string id_ = obj_file_name.split(" - ")[0] # Obtain full directory of the dataframe obj file dir_csv_file = os.path.join(dir_work, "{} - {}".format(id_, "res_num.csv")) # Load the dataframe obj file df_results = pload(open(path_input_file, "rb")) # Save the dataframe to csv file df_results.to_csv(dir_csv_file, index=True, sep=",")
def get_access_token(path_to_tok='./translate.tok'): if fexist(path_to_tok): ftok = open(path_to_tok, 'r+') tokdata = pload(ftok) expiretime = tokdata['expires_in'] if (datetime.now() - expiretime) > timedelta(10,0): return tokdata['token'] else: ftok = open(path_to_tok, 'w') args = {'client_id':clientid,'client_secret':clientse,'scope':'http://api.microsofttranslator.com/','grant_type':'client_credentials'} enc_args = urllib.urlencode(args) req = urllib2.Request(tok_url,enc_args) response = urllib2.urlopen(req) data = json.load(response) timeandten = datetime.now() + timedelta(minutes = 10) pdump({'token':data['access_token'],'expires_in':timeandten}, ftok) return data['access_token']
def cache(target, args, identifier=None, cache_life=3 * 24 * 3600): ''' Run the target function with the given args, and store it to a pickled cache folder using the given identifier or the name of the function. The next time it is executed, the cached output is returned unless cache_life time expires. ''' if identifier == None: identifier = target.__name__ identifier = sub(r'[/\\\*;\[\]\'\":=,<>\?\|]', '_', identifier) path = join(PATH_RESOURCES, f'.pickled/{identifier}.pk') makedirs(dirname(path), exist_ok=True) now = time() if exists(path): with open(path, 'rb') as fp: save_time, value = pload(fp) if now - save_time <= cache_life: return value res = target(*args) with open(path, 'wb') as fp: pdump((now, res), fp, protocol=3) return res
def open_mc10(fpath): """ Open saved (serialized) data previously imported with load_mc10 Parameters ---------- fpath : str Path to the file to be opened and loaded Returns ------- data : dict Dictionary of saved data. See load_mc10 for data structure """ fid = open(fpath, 'rb') # open the file data = pload(fid) # import data fid.close() # close the file return data
def rescue(identifier, function, arguments, path_data="data", path_cache=".pickled/%s.pk", cache_life=259200, sr=22500, window=45000, invalid=r"[/\\\*;\[\]\":=,<>]"): """ Caches the output of a function. """ path = path_cache % sub(invalid, "_", identifier) makedirs(dirname(path), exist_ok=True) if exists(path): with open(path, "rb") as fp: save_time, rate, value, window = pload(fp) if NOW - save_time <= cache_life and rate == sr and window == window: return value res = function(*arguments) with open(path, "wb") as fp: pdump((NOW, sr, res, window), fp, protocol=3) return res
def test_delay_buf_sink(self): """ make sure that delay of 0 and heat sink of 100% leak rate both result in identical results as old thingy. @return: @rtype: """ ref = self.regression_folder + '\\ref_delay1.pickle' from pickle import load as pload with open(ref, 'rb') as f: exp = pload(f) sim = TempSim(28, 19, 0, delay=0) pid = PIDController(37, 40, 30, ideal=False) result = self.run_process(sim, pid, 9500) for exp_pt, res_pt in zip(exp, result): self.assertEqual(exp_pt, res_pt)
async def load(self, ctx, *, name): player = self.lavalink.player_manager.get(ctx.guild.id) playlists = listdir(path.join('resources', 'playlists')) playlist_name = f'{ctx.author.id}_{name.lower()}' try: validate_filename(playlist_name) except ValidationError: return await ctx.send('Запрещенные символы в названии плейлиста') if playlist_name not in playlists: return await ctx.send( f'Нет плейлиста с таким названием\nДля просмотра своих плейлистов используйте {ctx.prefix}playlists' ) with open(path.join('resources', 'playlists', playlist_name), 'rb') as queue_file: queue = pload(queue_file) for track in queue: player.add(requester=ctx.author.id, track=track) ln = len(queue) await ctx.send( f'Плейлист {name} [{ln} {sform(ln, "трек")}] добавлен в очередь') if not player.is_playing: await player.play()
def test_tweet_partial(): ''' This function tests if Useful if no network connection is obtained ''' # tweetClass.classify_images() This requires a Google Vsision connection # Run offline tests homedir = Path(join(Path(__file__).parent, '..')).resolve(strict=True) fname = join(homedir, 'twitter_test', 'brabbott42_200tweets.p') outdir = join(homedir, 'output') if not isfile(fname): print(f'Could not find file: {fname}') assert False, 'Expected a twitter object to be saved for testing' try: twit_obj = pload(open(fname, 'rb')) except: assert False, 'could not load object' try: twit_obj.iteration = 0 # Make it build the dir for the first time if isdir(outdir): print(f'Detected output directory {outdir}') rmtree(outdir) twit_obj.makeoutputfolder() except: assert False, 'Could not make (or remove old) output directory' try: sumfile = twit_obj.write_summaryfile() except: assert False, 'Could not make summary file' try: word_cloud_from_txt(sumfile) except: assert False, 'Could not make word cloud' assert True
def get_pickle(self, run, tag, form='', warn=True): dic = {'PH': join('Ph_fit', '{tc}_{run}_{ch}_10000_eventwise_b2'), 'Ped': join('Pedestal', '{tc}_{run}_{ch}_ab2_fwhm_AllCuts'), 'Pul': join('Pulser', 'HistoFit_{tc}_{run}_{ch}_ped_corr_BeamOn'), 'Cur': join('Currents', '{tc}_{run}_{ch}'), 'PulPed': join('Pedestal', '{tc}_{run}_{ch}_ac2_fwhm_PulserBeamOn')} path = join(self.Dir, 'Pickles', '{}.pickle'.format(dic[tag])).format(tc=self.TestCampaign, run=run, ch=self.Channel) if not file_exists(path): if 'pixel' not in self.DetectorType.lower() and warn: warning('did not find {p}'.format(p=path)) return with open(path) as f: try: value = pload(f) if type(value) is Variable: return value fit = FitRes(value, form) if fit.Parameter(0) is None: warning('empty fitparameter pickle for: {}'.format(basename(path))) return return fit except ImportError as err: warning(err)
def get(self, name, call_seq): if self.counter[name] < call_seq: raise CallNotLogged dest = join(self.root, self.calls[name][call_seq - 1]["loc"]) if not exists(dest): raise CannotFindRecord with open(join(dest, "input_descriptor.pkl"), "rb") as f: input_descriptor = pload(f) args = [None]*len(input_descriptor["args"]) for i, elt in enumerate(input_descriptor["args"]): elt.load(dest) args[i] = elt.data kwargs = dict() for k, elt in input_descriptor["kwargs"].items(): elt.load(dest) kwargs[k] = elt.data return args, kwargs
def loadAndCacheDbHTMLPages(databaseSiteBaseURL, databaseSiteHTMLDumpPath, numPages): if path.exists(databaseSiteHTMLDumpPath): print("## Status: Found cached copy of Music 4 Dance website.") databaseSiteHTMLPages = pload(open(databaseSiteHTMLDumpPath, "rb")) else: # Make HTTP request try: databaseSiteHTMLPages = [] for i in range(numPages): databaseSiteHTMLPages.append( getHTML(databaseSiteBaseURL + "?page=%d" % (i + 1)).text) except: print("!! Error: Retrieving Music 4 Dance website unsuccessfull.") exit(0) else: print("## Status: Retrieved Music 4 Dance website.") # Save for later pdump(databaseSiteHTMLPages, open(databaseSiteHTMLDumpPath, "wb")) print("## Status: Cached copy of Music 4 Dance website for later.") return databaseSiteHTMLPages
def read_pickle(filename): """Read in pickle file. See more at `pickle <https://docs.python.org/3/library/pickle.html>`_. Parameters ---------- filename : str Full filename with path of file. Returns ------- dict Data object. """ assert isinstance(filename, str) with open(filename, "rb") as f: data = pload(f) assert isinstance(data, dict) return data
def loadGame(): i = 0 while not i == 1: system(clear) print('CURRENT SAVED GAMES:\n') path = f'{getcwd()}{slash}saves' system(f'{viewDir} {path}') loadGameName = input('\nWHICH SAVE WOULD YOU LIKE TO LOAD?\n\nENTER HERE (0 TO GO BACK): ') if str(loadGameName) == '0': return False try: loadThisGame = open(f'{path}{slash}{loadGameName.lower()}', 'rb') i = 1 except FileNotFoundError: system(clear) print('NAME OF SAVE GAME NOT FOUND\nTRY AGAIN') sleep(1) playerDict = pload(loadThisGame) PLAYER_LIST.clear() for player in playerDict: PLAYER_LIST.append(playerDict[player]) generateCityStartSupplies() # todo: save city information instead and then load that dict return
def __alt_authorize(self): credentials = None if USE_SERVICE_ACCOUNTS and not self.alt_auth: self.alt_auth = True if ospath.exists(self.__G_DRIVE_TOKEN_FILE): LOGGER.info("Authorize with token.pickle") with open(self.__G_DRIVE_TOKEN_FILE, 'rb') as f: credentials = pload(f) if credentials is None or not credentials.valid: if credentials and credentials.expired and credentials.refresh_token: credentials.refresh(Request()) else: flow = InstalledAppFlow.from_client_secrets_file( 'credentials.json', self.__OAUTH_SCOPE) LOGGER.info(flow) credentials = flow.run_console(port=0) # Save the credentials for the next run with open(self.__G_DRIVE_TOKEN_FILE, 'wb') as token: pdump(credentials, token) return build('drive', 'v3', credentials=credentials, cache_discovery=False) return None
def load_model(cls, path): """Loads a saved model from a .hdf5 file and updates its attributes. In case of a multi-output model, the .pkl file is loaded, since .hdf5 is not supported yet. Parameters: path (str): Path including the file name, from where the model should be loaded. Returns: GPy.models: Instantiated surrogate model. """ from profit.util import load from GPy import models self = cls() try: model_dict = load(path, as_type='dict') self.model = models.GPRegression.from_dict(model_dict) self.Xtrain = self.model.X self.ytrain = self.model.Y except (OSError, FileNotFoundError): from pickle import load as pload from os.path import splitext # Load multi-output model from pickle file self.model = pload(open(splitext(path)[0] + '.pkl', 'rb')) self.output_ndim = int(max(self.model.X[:, -1])) + 1 self.Xtrain = self.model.X[:len(self.model.X) // self.output_ndim, :-1] self.ytrain = self.model.Y.reshape(-1, self.output_ndim, order='F') self.multi_output = True self.kernel = self.model.kern self._set_hyperparameters_from_model() self.ndim = self.Xtrain.shape[-1] self.trained = True self.print_hyperparameters("Loaded") return self
def test_get_access_token(self): token = t.get_access_token() ftok = open('./translate.tok', 'r') self.assertEqual(unicode,type(token)) self.assertEqual(dict,type(pload(ftok)))
def main(argv, stop_after_init=False, preset_set_of_users=None): pickle_path_lptq = '/tmp/process_log_times_pq.bin.gz' pickle_path_clicks = '/tmp/process_log_clicks.bin.gz' pickle_path_clusters = '/tmp/process_log_clusters.dict.txt.gz' pickle_path_removed_queries = '/tmp/process_log_removed_queries.lst.txt.gz' pickle_path_big_queries_set = '/tmp/process_log_big_queries_set.lst.txt.gz' pickle_path_users = '/tmp/process_log_usets_set.lst.txt.gz' argc = len(argv) if argc <= len(CLI_ARGS): print "Usage: %s" % argv[0], ' '.join(CLI_ARGS) print "Currently missing parameters arguments:", ' '.join(CLI_ARGS[len(argv)-1:]) exit() global mdb_host mdb_host = argv[1].strip() filter_queries_file = argv[2].strip() allowed_users_file = argv[3].strip() log_filepath = argv[4].strip() clusters_file = argv[5].strip() queries_to_ids_file = argv[6].strip() t_init = time() # print "Starting... compute_everything()" # t0 = time() # everything = compute_everything(mdb_host, filter_queries_file, allowed_users_file) # removed_queries = everything['removed_queries'] # print "Done ", time()-t0 ######################################################################################################################## # We are going to do a lot of "is in allowed users?" so we need a set, not a list print "Loading users..." # users_set = set([int(line.strip()) for line in univ_open(allowed_users_file, mode='r')]) # We use compute_everything because it gets rid of the null-clusters queries before retrieving the list # of users, thus reducing the dataset overall, as queries are then retrieved from the users set t0 = time() global users_set if preset_set_of_users is not None: users_set = preset_set_of_users else: try: print "Trying to pickle from disk...", pickle_path_users with gzopen(pickle_path_users, 'r') as f: print "File", pickle_path_users, "was found!" users_set = set(load_pickled_list(f)) pickled = True except Exception as err: print "Error for", pickle_path_users, "was:", err # if not isinstance(err, IOError): print "No pickled files or error loading it, recomputing..." pickled = False # Note: here we use compute_everything because it will load the queries clusters OF THE INITIAL QUERIES only # remove the ones that have null clusterings # and then generate the list of users who queried the pruned list of queries # we do not direclty use the clusters from it, nor the queries, because we still have to remove the other queries # that have null clustering vectors. By "other queries" we mean not queries we use as the seed to select some user/data # any queries that is part of a user profile of one of the allowed users (the ones who queried the query list seed) # this bigger queries set is generated by load_big_query_set() in this file users_set = set(compute_everything(mdb_host, filter_queries_file, allowed_users_file)['users']) print "Done ", time()-t0 print "Total number of users that will be analyzed:", len(users_set) pickle_ask(pickled, pickle_path_users, users_set, dump_f=pickle_list) print "Done ", time()-t0 # everything = None # We are not using it afterwards, so, this should help the GC #################################################################################################################### # import itertoolsmodule as iter print "Computing the set of allowed queries..." t0 = time() try: print "Trying to pickle from disk...", pickle_path_big_queries_set with gzopen(pickle_path_big_queries_set, 'r') as f: big_queries_set = set(load_pickled_list(f)) pickled = True except Exception as err: if not isinstance(err, IOError): print "Error for", pickle_path_big_queries_set, "was:", err print "No pickled files or error loading it, recomputing..." pickled = False big_queries_set = load_big_query_set(log_filepath, users_set) print "Done ", time()-t0 print "Total number of queries that will be analyzed:", len(big_queries_set) pickle_ask(pickled, pickle_path_big_queries_set, big_queries_set, dump_f=pickle_list) #################################################################################################################### global clusters print "Pre-initializing clusters dict..." t0 = time() clusters = dict.fromkeys(big_queries_set) print "clusters now has", len(clusters), "keys" print "Done ", time()-t0 print "Retrieving big list of clusters for the", len(big_queries_set), "queries..." t0 = time() global clusters_loaded clusters_loaded = False p_clusters, mapres_clusters = run_in_bg_process(do_process_clusters_pickle, (pickle_path_clusters,)) def join_clusters(): p_clusters.join() global clusters, clusters_loaded if clusters_loaded: return clusters result = mapres_clusters.get()[0] if result is False: # The pickling from disk did not work, recompute it in place (join_clusters() is called when clusters are # NEEDED so we cannot wait/async this)) print "Error while pickling clusters from disk", pickle_path_clusters, ", recomputing..." t0 = time() result = do_process_clusters_recompute(big_queries_set, clusters_file, queries_to_ids_file, clusters) print "Done do_process_clusters_recompute()", time()-t0 # Any user input needs to be on the main thread, pickle ask will by itself send the pickling task to a bg # worker process if the user answers yes pickle_ask(False, pickle_path_clusters, result, dump_f=picke_dict) clusters_loaded = True clusters = result return clusters ######################################################################################################################## removed_queries = compute_removed_queries_because_of_null_clustering(pickle_path_removed_queries, clusters, join_clusters) print "Removed", len(removed_queries), "out of", len(big_queries_set) ######################################################################################################################## t1 = time() print "Launching process_log_clicks computation in a separated process" p_lpc, lpc_mapres = run_in_bg_process(process_log_clicks, (log_filepath, users_set, removed_queries)) p_lpc.close() ######################################################################################################################## ######################################################################################################################## print "Starting... process_log_times_pq()" t0 = time() try: print "Trying to pickle from disk...", pickle_path_lptq lptpq = pload(gzopen(pickle_path_lptq, 'rb')) pickled = True except Exception as err: if not isinstance(err, IOError): print "Error for", pickle_path_lptq, "was:", err print "No pickled files or error loading it, recomputing..." pickled = False lptpq = process_log_times_pq(log_filepath, users_set, removed_queries) print "Done process_log_times_pq() in", time()-t0 pickle_ask(pickled, pickle_path_lptq, lptpq) print "Starting... process_log_clicks()" t0 = time() # Note: Disabled the pickling as, for some reason, it does not work # and there is only ~15s difference between recomputation and pickling from disk anyway... # try: # print "Trying to pickle from disk..." # lpc = pload(open(pickle_path_clicks, 'rb')) # pickled = True # except Exception as err: # if not isinstance(err, IOError): # print "Error was:", err # print "No pickled files or error loading it, recomputing..." # pickled = False # lpc = process_log_clicks(log_filepath, users_set, removed_queries) ######################################################################################################################## ######################################################################################################################## print "waiting for the pool to finish, if not finished yet..." p_lpc.join() lpc = lpc_mapres.get()[0] print "Took a total time of", time()-t1, "or less" ######################################################################################################################## ######################################################################################################################## print "Done ", time()-t0 # pickle_ask(pickled, pickle_path_clicks, lpc) print "Some reprocessing..." # We need the clusters from now on, so let us wait for the children process to be finished and the data # transferred back to us join_clusters() print "Removing null-vectors clusters queries from `clusters`..." t0 = time() for qid in removed_queries: try: del clusters[qid] except KeyError: pass # If it was already not there, that's perfect print "Done ", time()-t0 t0 = time() for user_queries_dic in lpc.user_clicks_number: if user_queries_dic is None: continue del user_queries_dic['_id'] for user_queries_dic in lptpq.user_queries_number: if user_queries_dic is None: continue del user_queries_dic['_id'] print "Done ", time()-t0 # Deprecated, for now, but we might switch back to it so, keep it for now print "Computing number of users who issued the query, per query..." t0 = time() number_of_users_who_queried = dict.fromkeys(big_queries_set - removed_queries, 0) for query_dict in lptpq.user_queries_number: if query_dict is None: continue for qid in query_dict: number_of_users_who_queried[qid] += 1 print "Done ", time()-t0 print "Computing number of users who clicked, per query..." t0 = time() number_of_users_who_clicked = dict.fromkeys(big_queries_set - removed_queries, 0) for query_dict in lpc.user_clicks_number: if query_dict is None: continue for qid in query_dict: number_of_users_who_clicked[qid] += 1 print "Done ", time()-t0 # # GC big_queries_set = None removed_queries = None # print "Some reprocessing..." # t0 = time() # for user_queries_dic in lpc.user_clicks_number: # if user_queries_dic is None: # continue # del user_queries_dic['_id'] # for q in removed_queries: # try: # del user_queries_dic[q] # except KeyError: # # key was not there? fine, we did not need to delete it then # pass # for user_queries_dic in lptpq.user_queries_number: # if user_queries_dic is None: # continue # del user_queries_dic['_id'] # for q in removed_queries: # try: # del user_queries_dic[q] # except KeyError: # # key was not there? fine, we did not need to delete it then # pass print "Done ", time()-t0 print "Starting..." t0 = time() us.init( lpc.user_clicks_number, lptpq.user_queries_number, clusters, users_set, number_of_users_who_queried, number_of_users_who_clicked ) print "Done ", time()-t0 # Note: At this point in the main() execution, the script takes ~2.5G of RAM. print "Total initialization phase time:", time()-t_init if stop_after_init: return print "Initializing users similarity computation phase..." # Similarity computation benchmark: t0 = time() i = 0 global DATA_SET_SIZE DATA_SET_SIZE = len(users_set) # Note: a too small batch size will waste time respawning processes and re-generating the user_sim module cache # but a too high batch size will kill mongodb and the computer's RAM (as 1 batch size unit is 1 user computed by # the process and the process commits everything at once) print "Generating sorted users set..." print "Generating workers pool..." p = Pool(processes=POOL_SIZE) start_values = range(0, DATA_SET_SIZE, BATCH_SIZE) print "Mapping (launching) pool to", len(start_values), "different start_values", start_values t0 = time() p.map(compute_user_sim_batch, start_values) p.close() p.join() print "Workers finished in %.3f." % (time()-t0) # for u in users_set: # for u2 in users_set: # i += 1 # try: # us.sim(u, u2) # except KeyError as err: # print err # key = err.args[0] # print key, "in big_queries_set?", key in big_queries_set # print key, "in removed_queries?", key in removed_queries # print key, "in clusters?", key in clusters # res = False # for u_dict in lpc.user_clicks_number: # if u_dict is not None: # res |= (key in u_dict) # print key, "in clicks?", res # res = False # for u_dict in lptpq.user_queries_number: # if u_dict is not None: # res |= (key in u_dict) # print key, "in user_queries_number?", res # if i % 10000 is 0: # print i+1, "\t\tsim() calls in\t\t", time()-t0, "\t\taverage\t\t", (time()-t0)/float(i+1) raw_input("Now what?")
def __init__(self, master=None, thetype="IPs"): TK.Frame.__init__( self, master, bd="1p", relief="groove", ) self.top = self.winfo_toplevel() self.grid(pady=3, padx=0) self.app = TK.Frame(self) self.showsettings = TK.IntVar() row = 0 width = 0 row2 = 0 if thetype == "IPs": row=0 row2=0 width=31 if thetype == "URLs": row=6 row2=6 width=30 if thetype == "SPECs": row=12 row2=12 width=34 self.app.grid(column=4, row=row, sticky="NW") self.thetype=thetype self.etext = TK.StringVar() self.label = TK.Label(self, text=thetype) self.label.grid(column=0, row=row, sticky='NW') self.tbox = TK.Entry(self, textvariable=self.etext, width=width) self.tbox.grid(column=1, row=row, sticky='NW') self.tbox.bind('<Button-3>', self.eText) self.tbox.bind("<Return>", self.RetKey) self.tbox.bind('<Double-Button-1>', self.eTextclear) self.tbox["background"] = "White" self.tbox["foreground"] = "Black" self.button1=TK.Button(self, text="Spawn", command=lambda: self.fire(self.sources, self.values, self.trigger, self.etext)) self.button1.grid(column=2, row=row, sticky = 'NW') if (thetype != "SPECs"): self.button2=TK.Button(self, text="BL", command=lambda: self.processIP()) self.button2.grid(column=3, row=row, sticky = 'NW') self.cb_settings=TK.Checkbutton(self, variable=self.showsettings, onvalue=1, offvalue=0, command=lambda: self.showSettings(row)) self.cb_settings.grid(column=0, row=row2, pady=20, sticky = 'NW') self.a = sources.sources() self.IPdims = self.a.IPdimensions self.URLdims = self.a.URLdimensions self.SPECdims = self.a.SPECIALdimensions self.showSettings(row) if (thetype == "IPs"): self.sources = self.a.ip.sources self.trigger = self.a.ip.trigger self.links = self.a.IPLinks self.values = [[TK.IntVar() for x in xrange(len(self.sources[:]))]] if path.isfile(self.a.IPSettingsfile) and path.getsize(self.a.IPSettingsfile) > 0: persist = pload(open(self.a.IPSettingsfile, "rb")) for x in xrange(len(persist)): self.values[0][x].set(persist[x]) for x in xrange(0, len(self.sources[:])): self.addCheckBox(self.sources[x][0], x) self.ip_chb_define(self.sources, self.values, self.trigger, self.etext, self.links) self.read = self.a.IPpath self.blread = self.a.IPbl if (thetype == "URLs"): self.sources = self.a.url.sources self.trigger = self.a.url.trigger self.links = self.a.URLLinks self.values = [[TK.IntVar() for x in xrange(len(self.sources[:]))]] if path.isfile(self.a.URLSettingsfile) and path.getsize(self.a.URLSettingsfile) > 0: persist = pload(open(self.a.URLSettingsfile, "rb")) for x in xrange(len(persist)): self.values[0][x].set(persist[x]) for x in xrange(0, len(self.sources[:])): self.addCheckBox(self.sources[x][0], x) self.ip_chb_define(self.sources, self.values, self.trigger, self.etext, self.links) self.read = self.a.URLpath self.blread = self.a.URLbl if (thetype == "SPECs"): self.sources = self.a.special.sources self.trigger = self.a.special.trigger self.links = self.a.SPECIALLinks self.values = TK.IntVar() for x in xrange(0, len(self.sources[:])): self.addRadio(self.sources[x][0], x, x+1) self.ip_chb_define(self.sources, self.values, self.trigger, self.etext, self.links) self.update()