def main(workdir, identifier, numtopics): print("\n== evaluation ==") listcorpus = helpers.load_pickle(workdir, identifier, "allprepared.pickle") vectorcorpus = helpers.load_pickle(workdir, identifier, "vectorcorpus.pickle") model = helpers.load_model(workdir, identifier) resultsfolder = join(workdir, "results", identifier) model_coherence(listcorpus, vectorcorpus, model, numtopics, resultsfolder) topic_coherence(listcorpus, vectorcorpus, model, numtopics, resultsfolder)
def main(workdir, identifier): print("\n== visualize_model ==") model = helpers.load_model(workdir, identifier) vizfile = join(workdir, "results", identifier, "visualization.html") dictcorpus = helpers.load_pickle(workdir, identifier, "dictcorpus.pickle") vectorcorpus = helpers.load_pickle(workdir, identifier, "vectorcorpus.pickle") visualize_model(model, dictcorpus, vectorcorpus, vizfile) print("==", helpers.get_time(), "done visualizing", "==")
def __init__(self): """ unresolved_df (pandas dataframe): current records that have not exited the environment dead_set (set): apps that exited the environment past the 33 days cutoff buffer_df (pandas dataframe): records in experience replay buffer """ self.unresolved_df = helpers.load_pickle(c.unresolved_path, default=helpers.empty_df()) self.dead_set = helpers.load_pickle(c.dead_path, default=set()) self.buffer_df = helpers.load_pickle(c.buffer_path, default=helpers.empty_df())
def main(workdir, identifier): print("\n== text2corpus ==") allprepared = helpers.load_pickle(workdir, identifier, "allprepared.pickle") dictcorpus, vectorcorpus = build_vectorcorpus(allprepared) helpers.save_pickle(dictcorpus, workdir, identifier, "dictcorpus.pickle") helpers.save_pickle(vectorcorpus, workdir, identifier, "vectorcorpus.pickle") print("==", helpers.get_time(), "done building corpus", "==")
def main(workdir, dataset, identifier, numtopics): print("\n== postprocessing ==") model = helpers.load_model(workdir, identifier) vectorcorpus = helpers.load_pickle(workdir, identifier, "vectorcorpus.pickle") resultsfolder = join(workdir, "results", identifier) get_topics(model, numtopics, resultsfolder) get_topicwords(model, numtopics, resultsfolder) get_doc_topic_matrix(vectorcorpus, model, resultsfolder) make_mastermatrix(workdir, dataset, identifier) print("==", helpers.get_time(), "done postprocessing", "==")
def prepare(self, unique_texts): if self.vocabulary is None: self.vocabulary = helpers.load_pickle(self.model_path) model = te.TfidfWeighted() tokenlists = self.preprocess_all(unique_texts) tokens_clean = clean(tokenlists, self.vocabulary) assert len(tokens_clean) == len(unique_texts) # Distractors distractor_sentences = helpers.load_pickle( "./sts/data/random_bio_sentences.p") print("Num Distractor Sentences:", len(distractor_sentences)) distractor_tokenized = self.preprocess_all(distractor_sentences) distractors_clean = clean(distractor_tokenized, self.vocabulary) model.fit(tokenlists=distractors_clean + tokens_clean) vecs = model.transform(word_embedding_vocab=self.vocabulary, tokenlists=tokens_clean) self.set_sen2vec(unique_texts, vecs)
def prepare(self, unique_texts): if self.vocabulary is None: self.vocabulary = helpers.load_pickle(self.model_path) model = te.AverageEmbedding() tokenlists = self.preprocess_all(unique_texts) tokens_clean = clean(tokenlists, self.vocabulary) assert len(tokens_clean) == len(unique_texts) vecs = model.transform(word_embedding_vocab=self.vocabulary, tokenlists=tokens_clean) self.set_sen2vec(unique_texts, vecs)
def get_right_columns(df, saved_path): """ this ensures the right columns are passed into the model columns not found at saved_path are added with value 0 extra columns are dropped df (pandas dataframe): the df in question saved_path (str): location of saved file """ prev_cols = helpers.load_pickle(saved_path, default=None) if prev_cols is None: prev_cols = df.columns helpers.write_pickle(saved_path, prev_cols) return df.reindex(columns=prev_cols, fill_value=0)
def main(): f_article = "articles" f_filtered= "filtered" f_final = "final.json" args = parse_arguments() # scrape end store NBA recaps if args.scrape: articles = get_site_text(args.date, args.days) save_pickle(articles, f_article) # load scraped articles articles = load_pickle(f_article) filtered = [] for art in articles: starts = [] print(art[0]) records = get_records(art[1], starts) streaks = get_streaks(art[1], starts) # cleaning headers like TIP-INS records = filter_out_upper(records) streaks = filter_out_upper(streaks) filtered.append((art[0], streaks, records)) # save filtered text to a pickle file save_pickle(filtered, f_filtered) filtered = load_pickle(f_filtered) extracted = get_extracted_articles(filtered) finals = [] for art in extracted: final = dict() final['score'] = art[0] final['rec'] = art[2] final['streaks'] = art[1] finals.append(final) dump_json(finals, f_final)
def initialize_diagnostics(self): """ initializes or loads model diagnostics """ self.recordings = helpers.load_pickle(self.mod_rec_path, default={}) if len(self.recordings) == 0 or self.new_mod: self.recordings['iter_array'] = np.array([], dtype=np.int32) self.recordings['loss_array'] = np.array([], dtype=np.float) self.recordings['grad_array'] = np.array([], dtype=np.float) self.recordings['grad_std_array'] = np.array([], dtype=np.float) self.recordings['action_list'] = [] self.recordings['reward_array'] = np.array([], dtype=np.float) self.recordings['target_array'] = np.array([], dtype=np.float) self.recordings['valid_array'] = np.array([], dtype=np.float) else: for key in self.recordings: self.recordings[key] = self.recordings[key][-8000:]
aopt['constraints']['tower']['stress']['flag'] = True aopt['constraints']['tower']['global_buckling']['flag'] = True aopt['constraints']['tower']['shell_buckling']['flag'] = True aopt['merit_figure'] = 'tower_mass' save_yaml(fname_analysis_options, aopt) ## Update modeling options mopt = load_yaml( os.path.join(run_dir, f'outputs.{istep-1}', f'NREL-2p5-116-step{istep-1}-modeling.yaml')) mopt['WISDEM']['TowerSE']['nLC'] = 1 # - apply loading so we can skip RotorSE pklfile = os.path.join(run_dir, f'outputs.{istep-1}', f'NREL-2p5-116-step{istep-1}.pkl') lastoutput = load_pickle(pklfile) loading = { # need to explicitly cast to float, as workaround to what appears to be this issue: # https://github.com/SimplyKnownAsG/yamlize/issues/3 'mass': float(lastoutput['wt.towerse.tower.mass']['value'][0]), 'center_of_mass': [ float(val) for val in lastoutput['wt.towerse.geom.turb.rna_cg']['value'] ], 'moment_of_inertia': [ float(lastoutput['wt.towerse.pre.mIxx']['value'][0]), float(lastoutput['wt.towerse.pre.mIyy']['value'][0]), float(lastoutput['wt.towerse.pre.mIzz']['value'][0]), float(lastoutput['wt.towerse.pre.mIxy']['value'][0]), float(lastoutput['wt.towerse.pre.mIxz']['value'][0]),
#keys = scrub.gtExp_keys[task] keys = ['pegtransfer_18'] try: features except NameError: features = None if features is None: print 'Loading data...' features = getFeatures(dataset, sensors=feats, task=task, hand=hand, keys=keys, asdict=True) print 'returned features of size', len(features) # <codecell> #Load model trained = helpers.load_pickle('.ipython\\HMM-Train\\models\\'+'_'.join(['model','timdata',task, hand])) #Load codebook fh = open('.ipython\\HMM-Train\\codebooks\\'+'_'.join(['cdbk','timdata',dataset,task, hand,'v1']), 'r') codebook = json.loads(fh.read()) fh.close() # <codecell> #segment, normalize data # <codecell> '''Compute log probability of each grasp segment for each hand''' #For each segment in the Left Hand logprobNovL = 0; logprobExpL = 0; i=0 start = datetime.now()