Exemple #1
0
def main(workdir, identifier, numtopics): 
    print("\n== evaluation ==")
    listcorpus = helpers.load_pickle(workdir, identifier, "allprepared.pickle")
    vectorcorpus = helpers.load_pickle(workdir, identifier, "vectorcorpus.pickle")
    model = helpers.load_model(workdir, identifier)
    resultsfolder = join(workdir, "results", identifier)
    model_coherence(listcorpus, vectorcorpus, model, numtopics, resultsfolder)
    topic_coherence(listcorpus, vectorcorpus, model, numtopics, resultsfolder)
def main(workdir, identifier):
    print("\n== visualize_model ==")
    model = helpers.load_model(workdir, identifier)
    vizfile = join(workdir, "results", identifier, "visualization.html")
    dictcorpus = helpers.load_pickle(workdir, identifier, "dictcorpus.pickle")
    vectorcorpus = helpers.load_pickle(workdir, identifier, "vectorcorpus.pickle")
    visualize_model(model, dictcorpus, vectorcorpus, vizfile)
    print("==", helpers.get_time(), "done visualizing", "==")   
Exemple #3
0
 def __init__(self):
     """
     unresolved_df (pandas dataframe): current records that have not exited the environment
     dead_set (set): apps that exited the environment past the 33 days cutoff
     buffer_df (pandas dataframe): records in experience replay buffer
     """
     self.unresolved_df = helpers.load_pickle(c.unresolved_path, default=helpers.empty_df())
     self.dead_set = helpers.load_pickle(c.dead_path, default=set())
     self.buffer_df = helpers.load_pickle(c.buffer_path, default=helpers.empty_df())
def main(workdir, identifier):
    print("\n== text2corpus ==")
    allprepared = helpers.load_pickle(workdir, identifier,
                                      "allprepared.pickle")
    dictcorpus, vectorcorpus = build_vectorcorpus(allprepared)
    helpers.save_pickle(dictcorpus, workdir, identifier, "dictcorpus.pickle")
    helpers.save_pickle(vectorcorpus, workdir, identifier,
                        "vectorcorpus.pickle")
    print("==", helpers.get_time(), "done building corpus", "==")
def main(workdir, dataset, identifier, numtopics):
    print("\n== postprocessing ==")
    model = helpers.load_model(workdir, identifier)
    vectorcorpus = helpers.load_pickle(workdir, identifier, "vectorcorpus.pickle")
    resultsfolder = join(workdir, "results", identifier)
    get_topics(model, numtopics, resultsfolder)
    get_topicwords(model, numtopics, resultsfolder)
    get_doc_topic_matrix(vectorcorpus, model, resultsfolder)
    make_mastermatrix(workdir, dataset, identifier)
    print("==", helpers.get_time(), "done postprocessing", "==")   
    def prepare(self, unique_texts):
        if self.vocabulary is None:
            self.vocabulary = helpers.load_pickle(self.model_path)

        model = te.TfidfWeighted()
        tokenlists = self.preprocess_all(unique_texts)
        tokens_clean = clean(tokenlists, self.vocabulary)
        assert len(tokens_clean) == len(unique_texts)

        # Distractors
        distractor_sentences = helpers.load_pickle(
            "./sts/data/random_bio_sentences.p")
        print("Num Distractor Sentences:", len(distractor_sentences))
        distractor_tokenized = self.preprocess_all(distractor_sentences)
        distractors_clean = clean(distractor_tokenized, self.vocabulary)

        model.fit(tokenlists=distractors_clean + tokens_clean)

        vecs = model.transform(word_embedding_vocab=self.vocabulary,
                               tokenlists=tokens_clean)
        self.set_sen2vec(unique_texts, vecs)
    def prepare(self, unique_texts):
        if self.vocabulary is None:
            self.vocabulary = helpers.load_pickle(self.model_path)

        model = te.AverageEmbedding()
        tokenlists = self.preprocess_all(unique_texts)
        tokens_clean = clean(tokenlists, self.vocabulary)
        assert len(tokens_clean) == len(unique_texts)

        vecs = model.transform(word_embedding_vocab=self.vocabulary,
                               tokenlists=tokens_clean)
        self.set_sen2vec(unique_texts, vecs)
Exemple #8
0
def get_right_columns(df, saved_path):
    """
    this ensures the right columns are passed into the model
        columns not found at saved_path are added with value 0
        extra columns are dropped
    df (pandas dataframe): the df in question
    saved_path (str): location of saved file
    """
    prev_cols = helpers.load_pickle(saved_path, default=None)
    if prev_cols is None:
        prev_cols = df.columns
        helpers.write_pickle(saved_path, prev_cols)
    return df.reindex(columns=prev_cols, fill_value=0)
Exemple #9
0
def main():
    f_article = "articles"
    f_filtered= "filtered"
    f_final = "final.json"
    args = parse_arguments()

    # scrape end store NBA recaps
    if args.scrape:
        articles = get_site_text(args.date, args.days)
        save_pickle(articles, f_article)

    # load scraped articles
    articles = load_pickle(f_article)
    filtered = []
    for art in articles:
        starts = []
        print(art[0])
        records = get_records(art[1], starts)
        streaks = get_streaks(art[1], starts)
        # cleaning headers like TIP-INS
        records = filter_out_upper(records)
        streaks = filter_out_upper(streaks)
        filtered.append((art[0], streaks, records))
    # save filtered text to a pickle file
    save_pickle(filtered, f_filtered)

    filtered = load_pickle(f_filtered)

    extracted = get_extracted_articles(filtered)
    finals = []
    for art in extracted:
        final = dict()
        final['score'] = art[0]
        final['rec'] = art[2]
        final['streaks'] = art[1]
        finals.append(final)

    dump_json(finals, f_final)
Exemple #10
0
 def initialize_diagnostics(self):
     """
     initializes or loads model diagnostics
     """
     self.recordings = helpers.load_pickle(self.mod_rec_path, default={})
     if len(self.recordings) == 0 or self.new_mod:
         self.recordings['iter_array'] = np.array([], dtype=np.int32)
         self.recordings['loss_array'] = np.array([], dtype=np.float)
         self.recordings['grad_array'] = np.array([], dtype=np.float)
         self.recordings['grad_std_array'] = np.array([], dtype=np.float)
         self.recordings['action_list'] = []
         self.recordings['reward_array'] = np.array([], dtype=np.float)
         self.recordings['target_array'] = np.array([], dtype=np.float)
         self.recordings['valid_array'] = np.array([], dtype=np.float)
     else:
         for key in self.recordings:
             self.recordings[key] = self.recordings[key][-8000:]
Exemple #11
0
    aopt['constraints']['tower']['stress']['flag'] = True
    aopt['constraints']['tower']['global_buckling']['flag'] = True
    aopt['constraints']['tower']['shell_buckling']['flag'] = True
    aopt['merit_figure'] = 'tower_mass'
    save_yaml(fname_analysis_options, aopt)

    ## Update modeling options
    mopt = load_yaml(
        os.path.join(run_dir, f'outputs.{istep-1}',
                     f'NREL-2p5-116-step{istep-1}-modeling.yaml'))
    mopt['WISDEM']['TowerSE']['nLC'] = 1

    # - apply loading so we can skip RotorSE
    pklfile = os.path.join(run_dir, f'outputs.{istep-1}',
                           f'NREL-2p5-116-step{istep-1}.pkl')
    lastoutput = load_pickle(pklfile)
    loading = {
        # need to explicitly cast to float, as workaround to what appears to be this issue:
        # https://github.com/SimplyKnownAsG/yamlize/issues/3
        'mass':
        float(lastoutput['wt.towerse.tower.mass']['value'][0]),
        'center_of_mass': [
            float(val)
            for val in lastoutput['wt.towerse.geom.turb.rna_cg']['value']
        ],
        'moment_of_inertia': [
            float(lastoutput['wt.towerse.pre.mIxx']['value'][0]),
            float(lastoutput['wt.towerse.pre.mIyy']['value'][0]),
            float(lastoutput['wt.towerse.pre.mIzz']['value'][0]),
            float(lastoutput['wt.towerse.pre.mIxy']['value'][0]),
            float(lastoutput['wt.towerse.pre.mIxz']['value'][0]),
#keys = scrub.gtExp_keys[task]
keys = ['pegtransfer_18']

try:
    features
except NameError:
    features = None
if features is None:
    print 'Loading data...'
    features = getFeatures(dataset, sensors=feats, task=task, hand=hand, keys=keys, asdict=True)
    print 'returned features of size', len(features)

# <codecell>

#Load model
trained = helpers.load_pickle('.ipython\\HMM-Train\\models\\'+'_'.join(['model','timdata',task, hand]))
#Load codebook
fh = open('.ipython\\HMM-Train\\codebooks\\'+'_'.join(['cdbk','timdata',dataset,task, hand,'v1']), 'r')
codebook = json.loads(fh.read())
fh.close()

# <codecell>

#segment, normalize data

# <codecell>

'''Compute log probability of each grasp segment for each hand'''
#For each segment in the Left Hand
logprobNovL = 0; logprobExpL = 0; i=0
start = datetime.now()