Esempio n. 1
0
def decrypt(key, ct_path="ciphertext.enc", savePT="plaintext.dec"):
    with open(ct_path, 'rb') as input:
        u = dill.load(input)
        ciphertext = dill.load(input)
    v = modexp(u, key.x, key.p)

    uv = str(u)+str(v)
    k = SHA224.new(uv.encode('utf-8')).hexdigest().encode('utf-8') #symmetric key for compute the ciphertext with AES
    print("K: "+str(k))

    bs = Blowfish.block_size
    iv = ciphertext[:bs]
    # Remove IV
    ciphertext = ciphertext[bs:]
    print("CT-LEN:"+str(len(ciphertext)))
    cipher = Blowfish.new(k, Blowfish.MODE_CBC, iv)
    plaintext = cipher.decrypt(ciphertext)
    # Remove padding
    last_byte = plaintext[-1]
    plaintext = plaintext[:- (last_byte if type(last_byte) is int else ord(last_byte))]

    # Write to file the plaintext decrypted
    #plaintext = plaintext.decode(plaintext, key.iNumBits)
    io.open(savePT,"wb").write(plaintext)

    return plaintext
Esempio n. 2
0
def decrypt(sk, ct_path="ciphertext.enc", savePT="plaintext.dec"):
    # Load Y and ciphertext
    with open(ct_path, 'rb') as input:
        y = dill.load(input)
        ciphertext = dill.load(input)

    # Compute x = RSA^(-1)(y) as y^d mod N
    x = pow(y, sk.d, sk.n)
    # Compute symmetric key with Hash function (SHA224)
    k = SHA224.new(repr(x).encode('utf-8')).hexdigest().encode('utf-8') #symmetric key for compute the ciphertext
    print("K: "+str(k))

    # Count block_size
    bs = Blowfish.block_size
    # Retreive IV vector
    iv = ciphertext[:bs]
    # Remove IV
    ciphertext = ciphertext[bs:]
    print("CT-LEN:"+str(len(ciphertext)))
    # Initialize Blowfish cipher
    cipher = Blowfish.new(k, Blowfish.MODE_CBC, iv)
    # Decrypt
    plaintext = cipher.decrypt(ciphertext)
    # Remove padding
    last_byte = plaintext[-1]
    plaintext = plaintext[:- (last_byte if type(last_byte) is int else ord(last_byte))]

    # Write to file the plaintext decrypted
    io.open(savePT,"wb").write(plaintext)

    return plaintext
Esempio n. 3
0
 def test_run_advanced(self, tmpdir):
     temp_path = os.path.join(str(tmpdir), 'temp')
     log_path = os.path.join(str(tmpdir), 'log')
     paths = {
         'temp': temp_path,
         'log': log_path
     }
     pipe = Pipeline(paths=paths, create_paths=True)
     pipe.add_step(test_func1, ['func1'], var1=3, var2=4)
     pipe.add_step(test_func2, ['func2'], var1=25, var2=10)
     pipe.add_step(test_func2, ['func2'], var1=1, var2=0)
     pipe.add_step(test_func3, var1=1, var2=0)
     
     with pytest.raises(PipelineError):
         pipe.run()
     pipe = dill.load(open(os.path.join(paths['log'], 'pipeline.p'), 'rb'))
     assert pipe.run_step_idx==2
     assert pipe.steps[0].results=={'next_id': 4, 'status': 'success', 'step_id': 0, 'sum': 7}
     
     with pytest.raises(ZeroDivisionError):
         for step in pipe.steps:
             step.results = None
         pipe.run(resume=True, ignore_errors=True)
     
     new_pipe = dill.load(open(os.path.join(paths['log'], 'pipeline.p'), 'rb'))
     result = new_pipe.run(start_idx=1, ignore_errors=True, ignore_exceptions=True)
     assert result['status']=='success'
     assert new_pipe.steps[0].results==None
     assert new_pipe.steps[1].results=={'diff': 2.5, 'status': 'success'}
     assert new_pipe.steps[2].results=={'error': 'Division by 0', 'status': 'error'}
     assert new_pipe.steps[3].results['status']=='error'
def classify_patch_group(fn_rf, fn_kern, fn_patch_info):
    t0 = time()
    RF = dill.load(open(fn_rf, "rb"))  # unpack the RF classifier
    kernels = dill.load(open(fn_kern, "rb"))  # unpack the kernels
    patch_info = dill.load(open(fn_patch_info, "rb"))  # unpack the patch info

    a, b, c = patch_info[0]  # get the bounds of the set
    patches_a = patch_info[1]  # grab the set to classify
    patches_r = patch_info[2]  # grab the set to compare with (atlas mask)

    results = []

    for i, patch in enumerate(patches_a):  # go through each patch
        if np.all(patches_r[i]):  # if the patch is entirely masked
            feat = compute_feats(patch, kernels).flatten().reshape(1, -1)
            intens = np.array(compute_intens(patch)).flatten().reshape(1, -1)
            feat = np.concatenate((feat, intens), axis=1)
            prediction = RF.predict(feat)
            # print("Classifying patch {}/{}: {}".format(i, len(patches), prediction))
            results.append(np.full(patch.shape, prediction))
        else:  # the associated ROI patch is totally zero
            results.append(np.zeros(patch.shape))
    dt = time() - t0
    print("Classified group {}-{}/{} in {:.2f} time".format(a, b, c, dt))
    return results
Esempio n. 5
0
def get_SALICON_train(location=None):
    """
    Loads or downloads and caches the SALICON training dataset. For memory reasons no fixation trains
    are provided.
    @type  location: string, defaults to `None`
    @param location: If and where to cache the dataset. The dataset
                     will be stored in the subdirectory `SALICON_train` of
                     location and read from there, if already present.
    @return: Training stimuli, validation stimuli, testing stimuli, training fixation trains, validation fixation trains

    .. seealso::
        Ming Jiang, Shengsheng Huang, Juanyong Duan*, Qi Zhao: SALICON: Saliency in Context, CVPR 2015

        http://salicon.net/
    """
    if location:
        location = os.path.join(location, 'SALICON_train')
        if os.path.exists(location):
            stimuli = dill.load(open(os.path.join(location, 'stimuli.pydat'), 'rb'))
            fixations = dill.load(open(os.path.join(location, 'fixations.pydat'), 'rb'))
            return stimuli, fixations
        os.makedirs(location)
    stimuli, fixations = _get_SALICON('train',
                                      'https://s3.amazonaws.com/salicon-dataset/2015r1/train.zip',
                                      'd549761c16e59b80cd5981373ada5e98',
                                      'https://s3.amazonaws.com/salicon-dataset/2015r1/fixations_train2014.json',
                                      'ab60a090ee31642fbb4aa41f4953b8bd',
                                      location)
    return stimuli, fixations
Esempio n. 6
0
def get_SALICON_val(location=None):
    """
    Loads or downloads and caches the SALICON validation dataset. For memory reasons no fixation trains
    are provided.
    @type  location: string, defaults to `None`
    @param location: If and where to cache the dataset. The dataset
                     will be stored in the subdirectory `SALICON_train` of
                     location and read from there, if already present.
    @return: Training stimuli, validation stimuli, testing stimuli, training fixation trains, validation fixation trains

    .. seealso::
        Ming Jiang, Shengsheng Huang, Juanyong Duan*, Qi Zhao: SALICON: Saliency in Context, CVPR 2015

        http://salicon.net/
    """
    if location:
        location = os.path.join(location, 'SALICON_val')
        if os.path.exists(location):
            stimuli = dill.load(open(os.path.join(location, 'stimuli.pydat'), 'rb'))
            fixations = dill.load(open(os.path.join(location, 'fixations.pydat'), 'rb'))
            return stimuli, fixations
        os.makedirs(location)
    stimuli, fixations = _get_SALICON('val',
                                      'https://s3.amazonaws.com/salicon-dataset/2015r1/val.zip',
                                      '62cd6641a5354d3099a693ff90cb6dab',
                                      'https://s3.amazonaws.com/salicon-dataset/2015r1/fixations_val2014.json',
                                      '3224f8cf86ea8d248d93583866b60c5f',
                                      location)
    return stimuli, fixations
Esempio n. 7
0
def main():

    with open('./word2index.dict', 'rb') as f:
        word2index = dill.load(f)
    with open('./index2word.dict', 'rb') as f:
        index2word = dill.load(f)

    model = Seq2Seq(
        vocab_size=len(word2index),
        embed_size=300,
        hidden_size=300,
    )
    serializers.load_npz('seq2seq.npz', model)

    while True:
        s = input()
        test_input = Variable(
            np.array([word2index.get(word, word2index['UNK']) for word in mecab_wakati(s)], dtype='int32')
        )

        print('入力-> {}'.format(s))
        print('出力-> ', end="")
        for index in model.predict(test_input):
            print(index2word[index], end='')
        print()
    def _remove_nresults(self, traj, nresults, continue_folder):

        result_tuple_list = []

        n = 0
        for filename in os.listdir(continue_folder):
            _, ext = os.path.splitext(filename)

            if ext != '.rcnt':
                continue

            n += 1

            cnt_file = open(os.path.join(continue_folder, filename), 'rb')
            try:
                result = dill.load(cnt_file)
                cnt_file.close()
                result_tuple_list.append((result))
            except Exception:
                # delete broken files
                logging.getLogger().exception('Could not open continue snapshot '
                                              'file `%s`.' % filename)
                cnt_file.close()
                os.remove(filename)

        self.assertGreaterEqual(n, nresults)

        result_tuple_list = sorted(result_tuple_list, key=lambda x: x[0])
        timestamp_list = [x[1]['finish_timestamp'] for x in result_tuple_list]
        timestamp_list = timestamp_list[-nresults:]

        for timestamp in timestamp_list:
            filename = os.path.join(continue_folder, 'result_%s.rcnt' % repr(timestamp).replace('.','_'))
            os.remove(filename)

        result_tuple_list = []
        for filename in os.listdir(continue_folder):
            _, ext = os.path.splitext(filename)

            if ext != '.rcnt':
                continue

            cnt_file = open(os.path.join(continue_folder, filename), 'rb')
            result = dill.load(cnt_file)
            cnt_file.close()
            result_tuple_list.append((result))

        name_set = set([x[1]['name']  for x in result_tuple_list])
        removed = 0
        for run_name in traj.f_iter_runs():
            if run_name not in name_set:
                run_dict = traj.f_get_run_information(run_name, copy=False)
                run_dict['completed'] = 0
                idx = run_dict['idx']
                traj._updated_run_information.add(idx)
                removed += 1

        self.assertGreaterEqual(removed, nresults)
        logging.getLogger().error('Removed %s runs for continuing' % removed)
        traj.f_store(only_init=True)
def _generate_image_segments_and_label_batch(images,img_dir,seg_dir,mean_img):
    #choosing 5 images at random
    test_batch = []
    batch_labels     = []
    for i in range(0,images.__len__()):
        img = io.imread(img_dir+images[i])
        segf = open(seg_dir+images[i][0:-3]+'slic','rb')
        segmap = dill.load(segf)
        segments_l = dill.load(segf)
        sal_l = dill.load(segf)
        segf.close()
        data = im2mdfin(img,mean_img,segmap,segments_l)
            
        for j in range(0,segments_l.__len__()):                
            x = data.segments[j]
            dat = np.zeros([227,227,9],dtype = np.uint8)
            dat[:,:,0:3]= x.SP_Region
            dat[:,:,3:6]=x.SP_Neighbor
            dat[:,:,6:9]=x.Pic
            test_batch.append(sal_l[j])
            test_batch.append(x.SP_Region)
            test_batch.append(x.SP_Neighbor)
            test_batch.append(x.Pic)

    return test_batch
Esempio n. 10
0
def experimentLMKLIEP_per_feat(which_d):
    with open('feat_vec_out.'+which_d+'.en.pickle', 'rb') as handle:
        out_d = pickle.load(handle)

    with open('feat_vec_in.'+which_d+'.en.pickle', 'rb') as handle:
        in_d = pickle.load(handle)

    labels = - np.ones(out_d.shape[0])
    predictions = - np.ones(out_d.shape[0])
    labels[-50000:] = 1

    W = np.zeros(out_d.shape)

    for i in xrange(W.shape[1]):
        kliep = KLIEP(init_b=100, seed=0)
        lm_out = out_d[:, i].reshape((out_d.shape[0], 1))
        lm_in = in_d[:, i].reshape((in_d.shape[0], 1))
        kliep.fit_CV(lm_out, lm_in)
        W[:, i] = kliep.predict(lm_out).ravel()

    w = np.mean(W, axis=1)
    predictions[np.where(w > 1.4)[0]] = 1
    print 'total positive:', np.where(predictions == 1)[0].shape, ', out of:', out_d.shape[0]
    # sorted_ind = np.argsort(w, axis=None)[::-1]
    # predictions[sorted_ind[0:50000]] = 1

    p, r, f, s = precision_recall_fscore_support(labels.astype(int), predictions.astype(int), pos_label=1, average='micro')
    print 'Precision:', p,
    print 'Recall:', r,
    print 'F1:', f,
    print 'Support:', s,
Esempio n. 11
0
def experiment_LMSVM(which_d):
    with open('feat_vec_out.'+which_d+'.en.pickle', 'rb') as handle:
        out_d = pickle.load(handle)

    with open('feat_vec_in.'+which_d+'.en.pickle', 'rb') as handle:
        in_d = pickle.load(handle)

    labels = - np.ones(out_d.shape[0])
    labels[-50000:] = 1

    print 'Fitting one class SVM'
    clf = svm.OneClassSVM(kernel='linear')
    clf.fit(in_d)

    print 'Predicting for out domain'
    predictions = clf.predict(out_d)
    print 'total positive:', np.where(predictions == 1)[0].shape, ', out of:', out_d.shape[0]
    # sorted_ind = np.argsort(w, axis=None)[::-1]
    # predictions[sorted_ind[0:50000]] = 1

    p, r, f, s = precision_recall_fscore_support(labels.astype(int), predictions.astype(int), pos_label=1, average='micro')
    print 'Precision:', p,
    print 'Recall:', r,
    print 'F1:', f,
    print 'Support:', s,
Esempio n. 12
0
    def test_run_advanced(self, tmpdir):
        temp_path = os.path.join(str(tmpdir), "temp")
        log_path = os.path.join(str(tmpdir), "log")
        paths = {"temp": temp_path, "log": log_path}
        pipe = pipeline.Pipeline(paths=paths, create_paths=True)
        pipe.add_step(test_func1, ["func1"], var1=3, var2=4)
        pipe.add_step(test_func2, ["func2"], var1=25, var2=10)
        pipe.add_step(test_func2, ["func2"], var1=1, var2=0)
        pipe.add_step(test_func3, var1=1, var2=0)

        with pytest.raises(pipeline.PipelineError):
            pipe.run()
        pipe = dill.load(open(os.path.join(paths["log"], "pipeline.p"), "rb"))
        assert pipe.run_step_idx == 2
        assert pipe.steps[0].results == {"next_id": 4, "status": "success", "step_id": 0, "sum": 7}

        with pytest.raises(ZeroDivisionError):
            for step in pipe.steps:
                step.results = None
            pipe.run(resume=True, ignore_errors=True)

        new_pipe = dill.load(open(os.path.join(paths["log"], "pipeline.p"), "rb"))
        result = new_pipe.run(start_idx=1, ignore_errors=True, ignore_exceptions=True)
        assert result["status"] == "success"
        assert new_pipe.steps[0].results == None
        assert new_pipe.steps[1].results == {"diff": 2.5, "status": "success"}
        assert new_pipe.steps[2].results == {"error": "Division by 0", "status": "error"}
        assert new_pipe.steps[3].results["status"] == "error"
Esempio n. 13
0
 def put(self):
     hour = int(request.form['hour'])
     date = request.form['date']
     prcp = float(request.form['prcp'])*100
     snow = float(request.form['snow']) * 10
     tmax = float(request.form['tmax']) * 10
     tmin = float(request.form['tmin']) * 10
     date = pd.to_datetime(date)
     with open(os.path.join(APP_STATIC, 'uniquegeohash.pkl'), 'rb') as f:
         uniquegeohash = dill.load(f)
     with open(os.path.join(APP_STATIC, 'predict_pickup_density.pkl'), 'rb') as f:
         model = dill.load(f)
     x_dict = [{"pickup_geohash": geostr, "hour": hour, "dayofweek": date.dayofweek, 'month': date.month,'PRCP':prcp,'SNOW':snow,'TMAX':tmax,'TMIN':tmin} for geostr in uniquegeohash]
     x_df = pd.DataFrame(x_dict)
     y = model.predict(x_df)
     geodecode = [Geohash.decode(geocode) for geocode in uniquegeohash]
     yzipgeo = zip(y, geodecode)
     sortedlist = sorted(yzipgeo, key=lambda x: -x[0])
     top10address = []
     top10dict = {}
     for y, geodecode in sortedlist[0:50]:
         key = ",".join(geodecode)
         top10dict[key] = top10dict.get(key,0) + y
     top10res = []
     for key in top10dict:
         temptuple = (float(key.split(",")[0]),float(key.split(",")[1]))
         top10res.append([top10dict[key],temptuple])
     top10res = sorted(top10res,key=lambda x:-x[0])
     top10res = top10res[0:10] if len(top10res) > 10 else top10res
     for u,geodecode in top10res:
         g = geocoder.google([geodecode[0], geodecode[1]], method='reverse').address
         top10address.append(g)
     return {"top10": top10res,"top10address":top10address}
Esempio n. 14
0
def create_cifar10():
    # Load training data
    X, y = [], []
    for num in range(1,5):
        f = open('data_batch_' + str(num), 'rb')
        batch = pickle.load(f)
        X.append(batch['data'])
        y.append(batch['labels'])
    X = np.concatenate(X).reshape(-1, 3, 32, 32).astype(np.float32)
    y = np.concatenate(y).astype(np.int32)
    # Load test data
    f = open('test_batch', 'rb')
    batch = pickle.load(f)
    X_test = batch['data'].reshape(-1, 3, 32, 32).astype(np.float32)
    y_test = np.array(batch['labels'], dtype=np.int32)
    # Split arrays
    ii = np.random.permutation(len(X))
    X_train = X[ii[1000:]]
    y_train = y[ii[1000:]]
    X_val = X[ii[:1000]]
    y_val = y[ii[:1000]]
    # Offset data
    offset = np.mean(X_train, 0)
    scale = np.std(X_train, 0).clip(min=1)
    X_train = (X_train - offset) / scale
    X_val = (X_val - offset) / scale
    X_test = (X_test - offset) / scale
    # Save data
    pickle.dump(X_train, open('cifar10_X_train', "wb"))
    pickle.dump(y_train, open('cifar10_y_train', "wb"))
    pickle.dump(X_val, open('cifar10_X_val', "wb"))
    pickle.dump(y_val, open('cifar10_y_val', "wb"))
    pickle.dump(X_test, open('cifar10_X_test', "wb"))
    pickle.dump(y_test, open('cifar10_y_test', "wb"))
    return True
Esempio n. 15
0
def get_data():
    '''
    get the predictor and target and return them
    '''

    nids = dill.load(open('movie_ids.pkl', 'rb'))
    model = dill.load(open('all2.pkl', 'rb'))
    W = model.W
    df = pd.DataFrame(W, index=nids)

    c = MongoClient()
    db = c['movies']
    boxoffice2 = db.boxoffice2
    movie_info = db.movie_info

    r = list(boxoffice2.find({}, {'_id': 1, 'BoxOffice2': 1}))
    df_bf = pd.DataFrame(r).set_index('_id')

    r2 = list(movie_info.find({}, {'_id': 1, 'year': 1, 'title': 1}))
    df_year = pd.DataFrame(r2)
    df_year = df_year.set_index('_id')

    df = df.join(df_bf).join(df_year)

    cond1 = (df['year'] >= 2010)
    cond2 = ~ np.isnan(df['BoxOffice2'])

    cond = cond1 & cond2

    df_subset = df[cond]
    y = df_subset['BoxOffice2'].values
    X = df_subset.iloc[:, :-3].values

    return X, y
def main():
	argc = len(sys.argv)
	
	if argc < 2:
		print "Error: No input files"
		exit()
	data = pickle.load(open(sys.argv[1], "rb"))

	bwords = None
	vocabulary = None
	if argc > 2:
		vocabulary, bwords, _ = pickle.load(open(sys.argv[2], "rb"))

	corpus, topics, titles, time, grpid = data

	BV = BVsmVectorizer(vocabulary)
	# N  = len(corpus)
	i  = 0

	time   = np.array(time)
	# corpus = np.array(corpus)
	topics = np.array(topics)
	titles = np.array(titles)
	grpid  = np.array(grpid)
	allDates = sorted(list(set(time)))

	for d in allDates:

		try:
			dcorpus = list()
			indeces = list(np.nonzero(time == d)[0])
			for j in indeces:
				dcorpus.append(corpus[j])
			c, t, b, sab, smb = BV.vectorize(dcorpus, vocab=vocabulary)
			vocabulary = BV.vocab
		except KeyboardInterrupt:
			print "Could Not Vectorise", d
			exit()

		try:
			pickle.dump( ((c, t, b, sab, smb), 
				          topics[time == d], titles[time == d], 
				            time[time == d], grpid[time == d]) ,
				              open("vectors/"+str(d), "wb") )

			# pickle.dump( (BV.vocab, BV.bwords, BV.wordWise / BV.N, BV.N, d) , open("vectors/vectorizer."+str(i%2), "wb"))
		except KeyboardInterrupt:
			print "Could Not Pickle", d
			exit()

		i += 1
		print d, (d%2), "\n"
		if not d % 30:
			pickle.dump( (BV.vocab, BV.bwords, BV.bvocab, BV.wordWise / BV.N, BV.N, d) , open("vectors/vectorizer."+str(d), "wb"))

	print len(set(BV.bwords))
	print "Finishing..."
	pickle.dump( (BV.vocab, BV.bwords, BV.bvocab, BV.wordWise / BV.N, BV.N, d) , open("vectors/vectorizer."+str(d), "wb"))
def predict_checkins(id, n_docks, total_docks, features):
  if not (os.path.exists('estimators/checkin_estimator_high_'+str(id)+'.dill.bz2') and 
          os.path.exists('estimators/checkout_estimator_mid_'+str(id)+'.dill.bz2')):
    return None, None, None
  checkin_est = dill.load(bz2.BZ2File('estimators/checkin_estimator_high_'+str(id)+'.dill.bz2', 'r'))
  checkout_est = dill.load(bz2.BZ2File('estimators/checkout_estimator_mid_'+str(id)+'.dill.bz2', 'r'))
  flux = checkout_est.predict(features)/15 - checkin_est.predict(features)/15
  docks = np.clip(n_docks+flux.cumsum(), 0, total_docks)
  return int(docks[29]), int(docks[44]), int(docks[59])
Esempio n. 18
0
def test_2():
    max_len = 4

    print 'Loading heldout phrase pairs...'
    with open('/home/christos/SSLP/Project2/heldout/phrase_pairs_.pickle') as handle:
        phrase_pairs_held = pickle.load(handle)

    print 'Loading regular phrase pairs...'
    with open('/home/said/git/SSLP/Project2/training/phrase_pairs_.pickle') as handle:
        reg_phrase_pairs = pickle.load(handle)

    print 'Loading combined phrase pairs...'
    with open('/home/said/git/SSLP/Project2/training/combined_phrase_pairs_4_10.pickle') as handle:
        comb_phrase_pairs = pickle.load(handle)

    print 'Measuring sparsity...'
    ex_sparsity = measure_exact(reg_phrase_pairs, phrase_pairs_held, max_len=max_len)
    print 'Regular phrase pairs:'
    for i in xrange(len(ex_sparsity)):
        print 'For phrases with n =', i+1
        print 'In train and heldout:', ex_sparsity[i][0]
        print 'In train and not in heldout:', ex_sparsity[i][1]
        print 'In heldout and not in train:', ex_sparsity[i][2]
        print

    ex_sparsity = measure_exact(comb_phrase_pairs, phrase_pairs_held, max_len=max_len)
    print 'Combined phrase pairs:'
    for i in xrange(len(ex_sparsity)):
        print 'For phrases with n =', i+1
        print 'In train and heldout:', ex_sparsity[i][0]
        print 'In train and not in heldout:', ex_sparsity[i][1]
        print 'In heldout and not in train:', ex_sparsity[i][2]
        print

    print 'Calcuating precision and recall against Moses...'
    print 'PR for regular:'
    precision, recall = pr_vs_moses('phrase-table', reg_phrase_pairs)
    print 'Precision:', precision, 'Recall:', recall
    print

    print 'PR for combined:'
    precision, recall = pr_vs_moses('phrase-table', comb_phrase_pairs)
    print 'Precision:', precision, 'Recall:', recall
    print

    print 'Calcuating precision and recall against Moses per length...'
    print 'PR for regular:'
    precision, recall = pr_vs_moses_per_len('phrase-table', reg_phrase_pairs)
    for i in xrange(len(precision)):
        print 'n=', i+1, ':', 'Precision:', precision[i], 'Recall:', recall[i]
    print

    print 'PR for combined:'
    precision, recall = pr_vs_moses_per_len('phrase-table', comb_phrase_pairs)
    for i in xrange(len(precision)):
        print 'n=', i+1, ':', 'Precision:', precision[i], 'Recall:', recall[i]
    print
Esempio n. 19
0
    def test_dynamic_classes(self):
        test_classes = {
            algorithms.GradientDescent: {},
            algorithms.MinibatchGradientDescent: {'batch_size': 10},
            algorithms.Momentum: {'momentum': 0.5},
        }

        for algorithm_class, algorithm_params in test_classes.items():
            optimization_classes = [algorithms.WeightDecay,
                                    algorithms.SearchThenConverge]

            bpnet = algorithm_class(
                (3, 5, 1),
                addons=optimization_classes,
                verbose=False,
                **algorithm_params
            )
            data, target = datasets.make_regression(n_features=3, n_targets=1)

            data = preprocessing.MinMaxScaler().fit_transform(data)
            target_scaler = preprocessing.MinMaxScaler()
            target = target_scaler.fit_transform(target.reshape(-1, 1))

            with tempfile.NamedTemporaryFile() as temp:
                valid_class_name = bpnet.__class__.__name__
                dill.dump(bpnet, temp)
                temp.file.seek(0)

                restored_bpnet = dill.load(temp)
                restored_class_name = restored_bpnet.__class__.__name__
                temp.file.seek(0)

                self.assertEqual(valid_class_name, restored_class_name)
                self.assertEqual(optimization_classes,
                                 restored_bpnet.addons)

                bpnet.train(data, target, epochs=10)
                real_bpnet_error = bpnet.prediction_error(data, target)
                updated_input_weight = (
                    bpnet.input_layer.weight.get_value().copy()
                )

                dill.dump(bpnet, temp)
                temp.file.seek(0)

                restored_bpnet2 = dill.load(temp)
                temp.file.seek(0)
                restored_bpnet_error = restored_bpnet2.prediction_error(
                    data, target
                )

                np.testing.assert_array_equal(
                    updated_input_weight,
                    restored_bpnet2.input_layer.weight.get_value()
                )
                # Error must be big, because we didn't normalize data
                self.assertEqual(real_bpnet_error, restored_bpnet_error)
  def compute_predictions(self, model_name, inputs):
    '''
    computes prediction for given sample and specified model
    '''

    if self.approx_technique.lower() in ['rsm', 'moa', 'hda', 'gp']:
      model = gtapprox.Model(self.models_save_path+model_name+self.ext)
      prediction = model.calc(inputs)

    elif self.approx_technique.lower() == 'xgboost':
      model = Xgboost()
      model.load(self.models_save_path+model_name+self.ext)
      prediction = model.predict(inputs)[:, np.newaxis]

    elif self.approx_technique.lower() == 'quadxgboost':
      model = Xgboost()
      model.load(self.models_save_path+model_name+'_1_'+self.ext)
      prediction = model.predict(inputs)[:, np.newaxis]

      model0 = gtapprox.Model(self.models_save_path+model_name+'_0_'+self.ext)
      prediction = prediction + model0.calc(inputs)

    elif self.approx_technique.lower() == 'mars':

      with open(self.models_save_path+model_name+self.ext, 'rb') as file_object:
        model = pickle.load(file_object)

      prediction = model.predict(inputs)[:, np.newaxis]

    elif self.approx_technique.lower() == 'skgboost':

      with open(self.models_save_path+model_name+self.ext, 'rb') as file_object:
        model = pickle.load(file_object)

      prediction = model.predict(inputs)[:, np.newaxis]

    elif self.approx_technique.lower() == 'adaboost':

      with open(self.models_save_path+model_name+self.ext, 'rb') as file_object:
        model = pickle.load(file_object)

      prediction = model.predict(inputs)[:, np.newaxis]

    elif self.approx_technique.lower() == 'zeros':
      prediction = np.zeros((inputs.shape[0], 1))

    elif self.approx_technique.lower() == 'means':

      with open(self.models_save_path+model_name+self.ext, 'rb') as file_object:
        model = pickle.load(file_object)

      prediction = model*np.ones((inputs.shape[0], 1))

    else:
      raise Exception('Wrong approx type specified!')

    return prediction
Esempio n. 21
0
def load_actions_from_file():
    """ loads Qtrons from previous file, please check that file load_qtrons.pk1 exists
    in data directory """

    with open('./data/load_qtrons.pk1', 'rb') as f:
        parameters = dill.load(f)
        actions = dill.load(f)
        config = dill.load(f)

    return parameters, actions, config
Esempio n. 22
0
def _main(fd):
    with os.fdopen(fd, 'rb', closefd=True) as from_parent:
        process.current_process()._inheriting = True
        try:
            preparation_data = pickle.load(from_parent)
            prepare(preparation_data)
            self = pickle.load(from_parent)
        finally:
            del process.current_process()._inheriting
    return self._bootstrap()
Esempio n. 23
0
 def __init__(self, words_file, ep_cache_file, occ_cache_file, accuracy):
     with open(ep_cache_file, 'rb') as f:
         self.error_probability = dill.load(f)
     with open(occ_cache_file, 'rb') as f:
         self.occurrneces = dill.load(f)
     self.words = TextCorpusStatisticsCalculator([]).read_words(words_file)
     self.accuracy = accuracy
     self.n = sum([v for k, v in self.occurrneces.items()])
     self.m = len(self.words)
     self.calc = ErrorProbabilityCalculator()
Esempio n. 24
0
def update_session(fname=None):
    import dill as pickle
    if fname is None:
        fname = conf.session
    try:
        s = pickle.load(gzip.open(fname,"rb"))
    except IOError:
        s = pickle.load(open(fname,"rb"))
    kamene_session = builtins.__dict__["kamene_session"]
    kamene_session.update(s)
Esempio n. 25
0
def load_phrases(folder):
    print 'Loading...'
    with open(folder+'phrase_pairs_.pickle', 'rb') as handle:
        phrase_pairs = pickle.load(handle)
    with open(folder+'en_given_nl_.pickle', 'rb') as handle:
        en_given_nl = pickle.load(handle)
    with open(folder+'nl_given_en_.pickle', 'rb') as handle:
        nl_given_en = pickle.load(handle)
    with open(folder+'joint_ennl_.pickle', 'rb') as handle:
        joint_ennl = pickle.load(handle)
    print 'Loaded.'

    return phrase_pairs, en_given_nl, nl_given_en, joint_ennl
def main():

    classifier_file = open('saved_classifiers/spam_classifier.pickle', 'rb')
    classifier_object = dill.load(classifier_file)
    classifier_file.close()

    trainer_file = open('saved_classifiers/trainer.pickle', 'rb')
    trainer_object = dill.load(trainer_file)
    trainer_file.close()

    # Testing the accuracy
    test(trainer_object, classifier_object, 'spam')
    test(trainer_object, classifier_object, 'ham')
Esempio n. 27
0
def category_model(record):
    test_tf={x:1 for x in record['categories']}
    f=open("./ml/dict_category_model", "r")
    a=dill.load(f)
    test_tf1=a.transform(test_tf)
    f.close()
    
    f=open("./ml/category_model","r")
    b=dill.load(f)
    f.close()
    try:
        return b.predict(test_tf1)
    except:
        return 0
Esempio n. 28
0
    def test_dynamic_classes(self):
        test_classes = {
            algorithms.Backpropagation: {},
            algorithms.MinibatchGradientDescent: {"batch_size": 10},
            algorithms.Momentum: {"momentum": 0.5},
            algorithms.RPROP: {"maximum_step": 1},
            algorithms.IRPROPPlus: {"maximum_step": 1},
            algorithms.ConjugateGradient: {"update_function": "fletcher_reeves"},
            algorithms.QuasiNewton: {"update_function": "bfgs"},
            algorithms.HessianDiagonal: {"min_eigenvalue": 1e-5},
            algorithms.LevenbergMarquardt: {"mu": 0.01},
        }

        for algorithm_class, algorithm_params in test_classes.items():
            optimization_classes = [algorithms.WeightDecay, algorithms.SearchThenConverge]

            bpnet = algorithm_class((3, 5, 1), optimizations=optimization_classes, verbose=False, **algorithm_params)
            data, target = datasets.make_regression(n_features=3, n_targets=1)

            data = preprocessing.MinMaxScaler().fit_transform(data)
            target_scaler = preprocessing.MinMaxScaler()
            target = target_scaler.fit_transform(target.reshape(-1, 1))

            with tempfile.NamedTemporaryFile() as temp:
                valid_class_name = bpnet.__class__.__name__
                dill.dump(bpnet, temp)
                temp.file.seek(0)

                restored_bpnet = dill.load(temp)
                restored_class_name = restored_bpnet.__class__.__name__
                temp.file.seek(0)

                self.assertEqual(valid_class_name, restored_class_name)
                self.assertEqual(optimization_classes, restored_bpnet.optimizations)

                bpnet.train(data, target, epochs=10)
                real_bpnet_error = bpnet.error(bpnet.predict(data), target)
                updated_input_weight = bpnet.input_layer.weight.copy()

                dill.dump(bpnet, temp)
                temp.file.seek(0)

                restored_bpnet2 = dill.load(temp)
                temp.file.seek(0)
                actual = restored_bpnet2.predict(data)
                restored_bpnet_error = restored_bpnet2.error(actual, target)

                np.testing.assert_array_equal(updated_input_weight, restored_bpnet2.input_layer.weight)
                # Error must be big, because we didn't normalize data
                self.assertEqual(real_bpnet_error, restored_bpnet_error)
Esempio n. 29
0
def sentiment(text):
	"""
	sentiment ภาษาไทย
	ใช้ข้อมูลจาก https://github.com/wannaphongcom/lexicon-thai/tree/master/ข้อความ/
	รับค่าสตริง str คืนค่า pos , neg"""
	with open(os.path.join(templates_dir, 'vocabulary.data'), 'rb') as in_strm:
		vocabulary = dill.load(in_strm)
	in_strm.close()
	with open(os.path.join(templates_dir, 'sentiment.data'), 'rb') as in_strm:
		classifier = dill.load(in_strm)
	in_strm.close()
	text=set(word_tokenize(text))-set(stopwords.words('thai'))
	featurized_test_sentence =  {i:(i in text) for i in vocabulary}
	return classifier.classify(featurized_test_sentence)
Esempio n. 30
0
#!/home/chenyang/Documents/twilio_webserver/env/bin/python2
#
# Author: Mike McKerns (mmckerns @caltech and @uqfoundation)
# Copyright (c) 2008-2016 California Institute of Technology.
# Copyright (c) 2016-2017 The Uncertainty Quantification Foundation.
# License: 3-clause BSD.  The full license text is available at:
#  - http://trac.mystic.cacr.caltech.edu/project/pathos/browser/dill/LICENSE

if __name__ == '__main__':
  import sys
  import dill
  for file in sys.argv[1:]:
    print (dill.load(open(file,'rb')))

Esempio n. 31
0
        res = MyEval.F14Exp(pred, test_label)
        print(res)

        with open('../../stat/res_exp_for_paper.csv', 'a') as fout:
            fout.write('{0},{1},{2},{3},{4},{5},{6},{7}\n'.format(
                method, i_iter, i_fold, res[0], res[1], res[2], res[3],
                res[4]))

        i_fold += 1


if __name__ == "__main__":

    with open('../data/features_all_v2.5.pkl', 'rb') as my_input:
        all_pid = dill.load(my_input)
        all_feature = np.array(dill.load(my_input))
        all_label = np.array(dill.load(my_input))
        print('features_all shape: ', all_feature.shape)

    with open('../data/feat_deep_centerwave_v0.1.pkl', 'rb') as my_input:
        feat_deep_centerwave = np.array(dill.load(my_input))
        print('feat_deep_centerwave shape: ', feat_deep_centerwave.shape)

    with open('../data/feat_resnet.pkl', 'rb') as my_input:
        feat_resnet = np.array(dill.load(my_input))
        print('feat_resnet shape: ', feat_resnet.shape)

    all_feature = np.c_[all_feature, feat_deep_centerwave, feat_resnet]
    all_label = np.array(all_label)
    all_pid = np.array(all_pid)
Esempio n. 32
0
filename = "51_rubidium87_relevant_2021-08-08T13_42_.pkl"
filename = "51_rubidium87_relevant_21*021-08-08T14_56_.pkl"
filename = "51_rubidium87_relevant_2021-08-08T18_54_.pkl"
filename = "51_rubidium87_relevant_2021-08-09T02_34_.pkl"
filename = "51_rubidium87_relevant_2021-08-09T13_23_.pkl"
filename = "51_rubidium87_relevant_2021-08-09T13_47_.pkl"
filename = "51_rubidium87_relevant_2021-08-09T14_36_.pkl"
filename = "51_rubidium87_relevant_2021-08-09T15_28_.pkl"
filename = "51_rubidium87_relevant_2021-08-09T16_27_.pkl"
filename = "51_rubidium87_relevant_2021-08-09T17_12_.pkl"
filename = "51_rubidium87_relevant_2021-08-09T17_37.pkl"
filename = "51_rubidium87_relevant_2021-08-09T18_12.pkl"
filename = "60_rubidium87_relevant_2021-08-19T16_58.pkl"

with open(f"system/simulation/saved_simulations/{filename}", "rb") as f:
    simulation: Simulation = pickle.load(f)

# _raw_dc_calculator = simulation.get_calculator((270, 230))
# simulation.dc_field_calculator = lambda _t: _raw_dc_calculator(_t).round(1)
# _raw_dc_calculator = simulation.get_calculator((270, 210))
# simulation.dc_field_calculator = lambda _t: _raw_dc_calculator(_t).round(1)
#
# simulation.rf_freq_calculator = simulation.get_calculator(230e6 / 1e9)
# simulation.rf_field_calculator = lambda t: 3 * np.sin(np.pi * t / 1000 / simulation.t)

print(simulation.dc_field)
print(simulation.rf_field)
print(simulation.rf_freq)
print(simulation.t)

systems: List[qutip.Qobj] = simulation.results.states
Esempio n. 33
0
        lenDBuser = len(DBuser)
        lenUserVec = len(userVec)
        jaccard = matches / (lenDBuser + lenUserVec - matches)

        tempSim = jaccard * (1 - MSD)

        if tempSim > bestSim:
            bestSim = tempSim
            bestSimUser = u
    bestSimUser = icamf_recommender.rating_object.ids_user[bestSimUser]

    return bestSimUser, bestSim


with open("dummy_model.pkl", "rb") as f:
    icamf_recommender = dill.load(f)

if __name__ == '__main__':
    app.run(host="0.0.0.0", port=8080)

#To run without clipping set to False or del argument

#train_recommender_kfold(kfold=5, regularizer=0.001, learning_rate=0.001, num_factors=20, iterations=50, clipping=5)
#train_recommender_kfold(kfold=5, regularizer=0.001, learning_rate=0.002, num_factors=20, iterations=50, clipping=5)
#train_recommender_kfold(kfold=5, regularizer=0.001, learning_rate=0.005, num_factors=20, iterations=50, clipping=5)

#train_and_save_model(regularizer=0.001, learning_rate=0.002, num_factors=20, iterations=1, clipping=5)

#with open("dummy_model.pkl", "rb") as f:
#    icamf_recommender = dill.load(f)
Esempio n. 34
0
def sample(save_dir):
    path_to_config = save_dir + "/config"
    if not os.path.isfile(path_to_config):
        raise IOError("Could not find " + path_to_config)
    with open(path_to_config, "rb") as f:
        gen_config = pickle.load(f)

    # # Load vocabulary encoder
    # glove_dir = '/Users/danfriedman/Box Sync/My Box Files/9 senior spring/gen/glove/glove.6B/glove.6B.50d.txt'
    # #glove_dir = '/data/corpora/word_embeddings/glove/glove.6B.50d.txt'
    if gen_config.use_glove:
        _, _, _, L = data_reader.glove_encoder(gen_config.glove_dir)
    else:
        L = None

    # Rebuild the model
    with tf.variable_scope("LSTM"):
        gen_model = lstm_ops.seq2seq_model(
            encoder_seq_length=gen_config.d_len,
            decoder_seq_length=1,
            num_layers=gen_config.num_layers,
            embed_size=gen_config.embed_size,
            batch_size=gen_config.batch_size,
            hidden_size=gen_config.hidden_size,
            vocab_size=gen_config.vocab_size,
            dropout=gen_config.dropout,
            max_grad_norm=gen_config.max_grad_norm,
            use_attention=gen_config.use_attention,
            embeddings=L,
            is_training=False,
            is_gen_model=True,
            token_type=gen_config.token_type,
            reuse=False)

    with tf.Session() as session:
        saver = tf.train.Saver()
        saver.restore(session,
                      tf.train.latest_checkpoint('./' + args.save_dir))

        def generate(description, temperature):
            return lstm_ops.generate_text_beam_search(
                session=session,
                model=gen_model,
                encode=gen_config.encode,
                decode=gen_config.decode,
                description=description,
                d_len=gen_config.d_len,
                beam=5,
                stop_length=gen_config.c_len,
                temperature=temperature)

        seed = "Three huge birds wait outside of the window of a man's room. The man is talking on the phone."
        temp = 1.0

        print(generate(seed, temp))

        while raw_input("Sample again? ([y]/n): ") != "n":
            new_seed = raw_input("seed: ")
            if len(gen_config.encode(seed)) > gen_config.d_len:
                print("Description must be < {} tokens".format(
                    gen_config.d_len))
                continue
            new_temp = raw_input("temp: ")

            if new_seed != "":
                seed = new_seed
            if new_temp != "":
                temp = float(new_temp)

            print(generate(seed, temp))
Esempio n. 35
0
def resume(save_file):
    simulation = dill.load(save_file)
    simulation.run(resume=True)
Esempio n. 36
0

def tokenizer(text):  # create a tokenizer function
    # 返回 a list of <class 'spacy.tokens.token.Token'>
    return [tok.text for tok in spacy_en.tokenizer(text)]


from torchtext import data

import numpy as np
from data import text_utils

args = argument_parser()

with open("seq2seq/bak/TEXT.Field", "rb") as f:
    TEXT = dill.load(f)

LENGTH = data.Field(sequential=False, use_vocab=False)

embeddings = np.random.random((len(TEXT.vocab.itos), args.embed_size))
args.TEXT = TEXT

encoder = SN_MODELS["encoder"](embeddings, args)
# atten = SN_MODELS["attention"](args.hidden_size * 4, 300)
# decoder = SN_MODELS["decoder"](embeddings, args)
atten = SN_MODELS["attention"](args.hidden_size, "general")
decoder = SN_MODELS["decoder"](embeddings, args, atten)

model_class = SN_MODELS[args.model_name]

# model = model_class(encoder, decoder, args)
Esempio n. 37
0
NAME = '*test13'

pth = Path('~/Simulations/coop_extension')

flist = list(pth.expanduser().glob(NAME))

def get_params(fname):

    par = str(fname).split('/')[5].split('-')
    return par

data = []
for fl in flist:
    with open(fl, 'rb') as fh:
        d = dill.load(fh)
        p = get_params(fl)
        data.append((p, d))
D = len(data)

N = len([
    x for x in data[0][1][-1].keys() if isinstance(x, tuple)
    ]) - 1

dataset = np.zeros((D, 10))
NN = tuple(range(N))

for i, (par, dt) in enumerate(data):
    dataset[i][0] = dt[-1]['roi_coop_theo'] # Theo roi coop
    dataset[i][1] = dt[-1]['roi_coop_days'].mean() # Mean roi coop
    dataset[i][2] = 100 * ((dataset[i, 0] / dataset[i , 1]) - 1)
                    default=10000,
                    help='number of points used for plotting')

args = parser.parse_args()
args.twofold = bool(args.twofold)
if args.target_dims == 'all':
    args.mds_dims = args.latent_dims

if not args.conditioning is None:
    args.name += "_%s" % args.conditioning

#%% Data import
import dill

with open(args.data, 'rb') as f:
    audioSet = dill.load(f)
#testSet = np.load('data/testSet.npy')

if not args.filter is None:
    wrong_ids = np.where(audioSet.metadata['octave'] > args.filter)
    audioSet.files = np.delete(np.array(audioSet.files), wrong_ids).tolist()
    audioSet.data = np.delete(np.array(audioSet.data), wrong_ids).tolist()
    for k, v in audioSet.metadata.items():
        audioSet.metadata[k] = np.delete(v, wrong_ids)

if len(args.frames) == 0:
    print('taking the whole dataset...')
    audioSet.flattenData(lambda x: x[:])
elif len(args.frames) == 2:
    print('taking between %d and %d...' % (args.frames[0], args.frames[1]))
    audioSet.flattenData(lambda x: x[args.frames[0]:args.frames[1]])
Esempio n. 39
0
 def load_model(self, path_to_save):
     with open(path_to_save, 'rb') as f:
         hmm_tagger = dill.load(f)
     self.model = hmm_tagger
     return hmm_tagger
def load_model():
    with open(MODEL_FILE_PATH,'rb') as file:
        return dill.load(file)
Esempio n. 41
0
 def _load_auxiliary_files(self):
     super(TextTrainer, self)._load_auxiliary_files()
     data_indexer_file = open("%s_data_indexer.pkl" % self.model_prefix,
                              "rb")
     self.data_indexer = pickle.load(data_indexer_file)
     data_indexer_file.close()
Esempio n. 42
0
def unpickle(path):
    with open(path, 'rb') as fp:
        return dill.load(fp)
Esempio n. 43
0
 def load(identifier):
     file_name = f'{se.local_storage}model_nav-mode_v{DILL_VERSION}_{identifier}.dill'
     with open(file_name, 'rb') as handle:
         bm = dill.load(handle)
     return bm
Esempio n. 44
0
import tempfile
import dill

try:
    import pathos.multiprocessing as mp
except ImportError:
    pass

from spatialist.ancillary import HiddenPrints

if __name__ == '__main__':

    # de-serialize the arguments written by function ancillary.multicore
    tmpfile = os.path.join(tempfile.gettempdir(), 'spatialist_dump')
    with open(tmpfile, 'rb') as tmp:
        func, cores, processlist = dill.load(tmp)

    # serialize the job arguments to be able to pass them to the processes
    processlist = [dill.dumps([func, x]) for x in processlist]

    # a simple wrapper to execute the jobs in the sub-processes
    # re-import of modules and passing pickled variables is necessary since on
    # Windows the environment is not shared between parent and child processes
    def wrapper(job):
        import dill
        function, proc = dill.loads(job)
        return function(**proc)

    # hide print messages in the sub-processes
    with HiddenPrints():
        # start pool of processes and do the work
Esempio n. 45
0
    def __init__(
            self,
            Nlayers=1,  # number of layers
            Ndirs=1,  # unidirectional or bidirectional
            Nx=100,  # input size
            Nh=100,  # hidden layer size
            Ny=100,  # output size
            Ah="relu",  # hidden unit activation (e.g. relu, tanh, lstm)
            Ay="linear",  # output unit activation (e.g. linear, sigmoid, softmax)
            predictPer="frame",  # frame or sequence
            loss=None,  # loss function (e.g. mse, ce, ce_group, hinge, squared_hinge)
            L1reg=0.0,  # L1 regularization
            L2reg=0.0,  # L2 regularization
            seed=15213,  # random seed for initializing the weights
            frontEnd=None,  # a lambda function for transforming the input
            filename=None,  # initialize from file
            initParams=None,  # initialize from given dict
    ):

        if filename is not None:  # load parameters from file
            with smart_open(filename, "rb") as f:
                initParams = dill.load(f)
        if initParams is not None:  # load parameters from given dict
            self.paramNames = []
            self.params = []
            for k, v in initParams.iteritems():
                if type(v) is numpy.ndarray:
                    self.addParam(k, v)
                else:
                    setattr(self, k, v)
                    self.paramNames.append(k)
            # F*ck, locals()[k] = v doesn't work; I have to do this statically
            Nlayers, Ndirs, Nx, Nh, Ny, Ah, Ay, predictPer, loss, L1reg, L2reg, frontEnd \
                = self.Nlayers, self.Ndirs, self.Nx, self.Nh, self.Ny, self.Ah, self.Ay, self.predictPer, self.loss, self.L1reg, self.L2reg, self.frontEnd
        else:  # Initialize parameters randomly
            # Names of parameters to save to file
            self.paramNames = [
                "Nlayers", "Ndirs", "Nx", "Nh", "Ny", "Ah", "Ay", "predictPer",
                "loss", "L1reg", "L2reg", "frontEnd"
            ]
            for name in self.paramNames:
                value = locals()[name]
                setattr(self, name, value)

            # Values of parameters for building the computational graph
            self.params = []

            # Initialize random number generators
            global rng
            rng = numpy.random.RandomState(seed)

            # Construct parameter matrices
            Nlstm = 4 if Ah == 'lstm' else 1
            self.addParam("Win", rand_init((Nx, Nh * Ndirs * Nlstm), Ah))
            self.addParam("Wrec",
                          rand_init((Nlayers, Ndirs, Nh, Nh * Nlstm), Ah))
            self.addParam(
                "Wup",
                rand_init((Nlayers - 1, Nh * Ndirs, Nh * Ndirs * Nlstm), Ah))
            self.addParam("Wout", rand_init((Nh * Ndirs, Ny), Ay))
            if Ah != "lstm":
                self.addParam("Bhid", zeros((Nlayers, Nh * Ndirs)))
            else:
                self.addParam(
                    "Bhid",
                    numpy.tile(
                        numpy.hstack([
                            full((Nlayers, Nh), 1.0),
                            zeros((Nlayers, Nh * 3))
                        ]), (1, Ndirs)))
            self.addParam("Bout", zeros(Ny))
            self.addParam("h0", zeros((Nlayers, Ndirs, Nh)))
            if Ah == "lstm":
                self.addParam("c0", zeros((Nlayers, Ndirs, Nh)))

        # Compute total number of parameters
        self.nParams = sum(x.get_value().size for x in self.params)

        # Initialize accumulators for gradients
        self.aparams = [
            theano.shared(zeros(x.get_value().shape)) for x in self.params
        ]

        # Build computation graph
        input = T.ftensor3()
        mask = T.imatrix()
        mask_int = [(mask % 2).nonzero(), (mask >= 2).nonzero()]
        mask_float = [
            T.cast((mask % 2).dimshuffle((1, 0)).reshape(
                (mask.shape[1], mask.shape[0], 1)), theano.config.floatX),
            T.cast((mask >= 2).dimshuffle((1, 0)).reshape(
                (mask.shape[1], mask.shape[0], 1)), theano.config.floatX)
        ]

        # mask_int = [(mask & 1).nonzero(), (mask & 2).nonzero()]
        # mask_float = [T.cast((mask & 1).dimshuffle((1, 0)).reshape((mask.shape[1], mask.shape[0], 1)), theano.config.floatX),
        #               T.cast(((mask & 2) / 2).dimshuffle((1, 0)).reshape((mask.shape[1], mask.shape[0], 1)), theano.config.floatX)]

        def step_rnn(x_t, mask, h_tm1, W, h0):
            h_tm1 = T.switch(mask, h0, h_tm1)
            return [ACTIVATION[Ah](x_t + h_tm1.dot(W))]

        def step_lstm(x_t, mask, c_tm1, h_tm1, W, c0, h0):
            c_tm1 = T.switch(mask, c0, c_tm1)
            h_tm1 = T.switch(mask, h0, h_tm1)
            a = x_t + h_tm1.dot(W)
            f_t = T.nnet.sigmoid(a[:, :Nh])
            i_t = T.nnet.sigmoid(a[:, Nh:Nh * 2])
            o_t = T.nnet.sigmoid(a[:, Nh * 2:Nh * 3])
            c_t = T.tanh(a[:, Nh * 3:]) * i_t + c_tm1 * f_t
            h_t = T.tanh(c_t) * o_t
            return [c_t, h_t]

        x = input if frontEnd is None else frontEnd(input)
        for i in range(Nlayers):
            h = (x.dimshuffle((1, 0, 2)).dot(self.Win)
                 if i == 0 else h.dot(self.Wup[i - 1])) + self.Bhid[i]
            rep = lambda x: T.extra_ops.repeat(
                x.reshape((1, -1)), h.shape[1], axis=0)
            if Ah != "lstm":
                h = T.concatenate([
                    theano.scan(
                        fn=step_rnn,
                        sequences=[
                            h[:, :, Nh * d:Nh * (d + 1)], mask_float[d]
                        ],
                        outputs_info=[rep(self.h0[i, d])],
                        non_sequences=[self.Wrec[i, d],
                                       rep(self.h0[i, d])],
                        go_backwards=(d == 1),
                    )[0][::(1 if d == 0 else -1)] for d in range(Ndirs)
                ],
                                  axis=2)
            else:
                h = T.concatenate([
                    theano.scan(
                        fn=step_lstm,
                        sequences=[
                            h[:, :, Nh * 4 * d:Nh * 4 * (d + 1)], mask_float[d]
                        ],
                        outputs_info=[rep(self.c0[i, d]),
                                      rep(self.h0[i, d])],
                        non_sequences=[
                            self.Wrec[i, d],
                            rep(self.c0[i, d]),
                            rep(self.h0[i, d])
                        ],
                        go_backwards=(d == 1),
                    )[0][1][::(1 if d == 0 else -1)] for d in range(Ndirs)
                ],
                                  axis=2)
        h = h.dimshuffle((1, 0, 2))
        if predictPer == "sequence":
            h = T.concatenate([
                h[mask_int[1 - d]][:, Nh * d:Nh * (d + 1)]
                for d in range(Ndirs)
            ],
                              axis=1)
        output = ACTIVATION[Ay](h.dot(self.Wout) + self.Bout)

        # Compute loss function
        if loss is None:
            loss = {
                "linear": "mse",
                "sigmoid": "ce",
                "softmax": "ce_group"
            }[self.Ay]
        if loss == "ctc":
            label = T.imatrix()
            label_time = T.imatrix()
            tol = T.iscalar()
            cost = ctc_cost(output, mask, label, label_time, tol)
        else:
            if predictPer == "sequence":
                label = T.fmatrix()
                y = output
                t = label
            elif predictPer == "frame":
                label = T.ftensor3()
                indices = (mask >= 0).nonzero()
                y = output[indices]
                t = label[indices]
            cost = T.mean({
                "ce":
                -T.mean(T.log(y) * t + T.log(1 - y) * (1 - t), axis=1),
                "ce_group":
                -T.log((y * t).sum(axis=1)),
                "mse":
                T.mean((y - t)**2, axis=1),
                "hinge":
                T.mean(relu(1 - y * (t * 2 - 1)), axis=1),
                "squared_hinge":
                T.mean(relu(1 - y * (t * 2 - 1))**2, axis=1),
            }[loss])

        # Add regularization
        cost += sum(abs(x).sum() for x in self.params) / self.nParams * L1reg
        cost += sum(T.sqr(x).sum() for x in self.params) / self.nParams * L2reg

        # Compute updates for network parameters
        updates = []
        lrate = T.fscalar()
        clip = T.fscalar()
        grad = T.grad(cost, self.params)
        grad_clipped = [T.maximum(T.minimum(g, clip), -clip) for g in grad]
        for w, a, g in zip(self.params, self.aparams, grad_clipped):
            updates.append((a, 0.9 * a + 0.1 * g**2))
            updates.append((w, w - lrate * g / (a + 1e-8)**0.5))

        # Create functions to be called from outside
        if loss == "ctc":
            inputs = [input, mask, label, label_time, tol, lrate, clip]
        else:
            inputs = [input, mask, label, lrate, clip]
        self.train = theano.function(
            inputs=inputs,
            outputs=cost,
            updates=updates,
        )

        self.predict = theano.function(inputs=[input, mask], outputs=output)
Esempio n. 46
0
def generate_mu_timelist(
    record_max=10**4,
    no_arms=5,
    mu_type='biggap',
    is_timevar=False,
    timevar_type='General',
    reward_type='Bernoulli',
    mu_list=None,
    direc=None,
    plot=True,
):
    ''' 
    generate the value of each arm mean as a function of time

    '''
    # if not provided the list of mean of arms to start with, then generate it
    if mu_list is None:
        mu_list = generate_mu_list(no_arms, mu_type, timevar_type, reward_type)

    # make sure the senatity
    no_arms = len(mu_list)

    if not is_timevar:
        # no changes
        mu_time_list = lambda t, i: mu_list[i]

    else:
        if timevar_type == 'Abrupt':

            # continuous changes
            if mu_type == "biggap":
                f_t = lambda t: 0.1 * pow(-1, np.floor(t / 50000)) * (
                    t >= 50000 * np.floor(t / 50000) and t < 50000 *
                    (np.floor(t / 50000) + 1))
            elif mu_type == "smallgap":
                f_t = lambda t: 0.01 * pow(-1, np.floor(t / 50000)) * (
                    t >= 50000 * np.floor(t / 50000) and t < 50000 *
                    (np.floor(t / 50000) + 1))
            else:
                f_t = lambda t: rand() * pow(-1, np.floor(t / 50000)) * (
                    t >= 50000 * np.floor(t / 50000) and t < 50000 *
                    (np.floor(t / 50000) + 1))

            mu_time_list = lambda t, i: mu_list[mod(
                i + np.int(np.divide(t, 50000)), no_arms)] + f_t(t)

        elif timevar_type == 'General':
            # continuous changes
            f_t = lambda t: sin(np.pi * t / 50000) + 1
            mu_time_list = lambda t, i: mu_list[i] * f_t(t + 50000 * i)

        elif timevar_type == 'RealAbrupt':
            # Example from the Yahoo! dataset, from article "Nearly Optimal Adaptive Procedure with Change Detection for Piecewise-Stationary Bandit" (M-UCB) https://arxiv.org/abs/1802.03692
            # 6 arms, 9 discrete change
            mu_list = [[0.071, 0.041, 0.032, 0.030, 0.020, 0.011],
                       [0.055, 0.053, 0.032, 0.030, 0.008, 0.011],
                       [0.040, 0.063, 0.032, 0.030, 0.008, 0.011],
                       [0.040, 0.042, 0.043, 0.030, 0.008, 0.011],
                       [0.030, 0.032, 0.055, 0.030, 0.008, 0.011],
                       [0.030, 0.032, 0.020, 0.030, 0.008, 0.021],
                       [0.020, 0.022, 0.020, 0.045, 0.008, 0.021],
                       [0.020, 0.022, 0.020, 0.057, 0.008, 0.011],
                       [0.020, 0.022, 0.034, 0.057, 0.022, 0.011]]

            mu_time_list = lambda t, i: mu_list[int(np.floor(t / 50000))][i]

    # save the data to direc
    if direc is not None:
        filename = 'noarms%d_mu_type%s_timevar_type%s_reward_type%s' % (
            no_arms, mu_type, timevar_type, reward_type)
        if not os.path.exists("%s/%s.pkl" % (direc, filename)):
            savelambda(mu_time_list, direc, filename)
        else:
            with open('%s/%s.pkl' % (direc, filename), 'rb') as input:
                mu_dat = dill.load(input)
            mu_time_list = mu_dat

        plotfilename = 'mu_time_list%s' % (filename)
        plotdirec = direc + '/plots'

        if not os.path.exists("%s/%s.pdf" %
                              (plotdirec, plotfilename)) and plot:
            plot_mu(plotdirec, plotfilename, no_arms, record_max, mu_time_list,
                    mu_type)

    # return the list, which is a list of funtion, each represent the mean of arms as a function of time
    return mu_time_list
Esempio n. 47
0
from flask import Flask
from flask import request
from flask import render_template

app = Flask(__name__)

import pickle
import numpy as np
import dill

model = dill.load(open('./data/model.pkl'))

c_feat_mat = model.feat_mat[:len(model.course_list), :]


# Form page to submit text
#============================================
# create page with a form on it
@app.route('/index.html')
@app.route('/index')
@app.route('/')
def submission_page():
    return render_template('index.html')


# Recommendation page
#============================================
# create page with a form on it


# Recommending
Esempio n. 48
0
def LoadNetwork(network2Load):
    with open(network2Load, "rb") as networkPickled:
        return pickle.load(networkPickled)
Esempio n. 49
0
#coding:utf-8
import dill as pickle
tmp = pickle.load(open('bpe_deen/bpe_vocab.pkl', 'rb'))
# open(opt.data_pkl, 'rb')
print(tmp)
Esempio n. 50
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    albert_config = modeling.AlbertConfig.from_json_file(
        FLAGS.albert_config_file)

    validate_flags_or_throw(albert_config)

    tf.gfile.MakeDirs(FLAGS.output_dir)
    #
    # tokenizer = fine_tuning_utils.create_vocab(
    #     vocab_file=FLAGS.vocab_file,
    #     do_lower_case=FLAGS.do_lower_case,
    #     spm_model_file=FLAGS.spm_model_file,
    #     hub_module=FLAGS.albert_hub_module_handle)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2
    if FLAGS.do_train:
        iterations_per_loop = int(
            min(FLAGS.iterations_per_loop, FLAGS.save_checkpoints_steps))
    else:
        iterations_per_loop = FLAGS.iterations_per_loop
    run_config = contrib_tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        keep_checkpoint_max=0,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        tpu_config=contrib_tpu.TPUConfig(
            iterations_per_loop=iterations_per_loop,
            num_shards=FLAGS.num_tpu_cores,
            per_host_input_for_training=is_per_host))

    train_examples = None
    num_train_steps = None
    num_warmup_steps = None
    if not tf.gfile.Exists(FLAGS.train_feature_file):
        raise Exception("Train tf-record missed...")
    cnt = 0
    records = tf.python_io.tf_record_iterator(FLAGS.train_feature_file)
    for _ in records:
        cnt += 1
    print(cnt)
    num_train_steps = int(cnt / FLAGS.train_batch_size *
                          FLAGS.num_train_epochs)
    # train_examples = squad_utils.read_squad_examples(
    #     input_file=FLAGS.train_file, is_training=True)
    # num_train_steps = int(
    #     len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs)
    if FLAGS.do_train:
        num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)
    #
    #   # Pre-shuffle the input to avoid having to make a very large shuffle
    #   # buffer in in the `input_fn`.
    #   rng = random.Random(12345)
    #   rng.shuffle(train_examples)

    tag_info = squad_utils.TagInfo.load(FLAGS.tag_info_file)
    print(tag_info.__dict__)

    model_fn = squad_utils.v2_model_fn_builder(
        albert_config=albert_config,
        init_checkpoint=FLAGS.init_checkpoint,
        learning_rate=FLAGS.learning_rate,
        num_train_steps=num_train_steps,
        num_warmup_steps=num_warmup_steps,
        use_tpu=FLAGS.use_tpu,
        use_one_hot_embeddings=FLAGS.use_tpu,
        max_seq_length=FLAGS.max_seq_length,
        start_n_top=FLAGS.start_n_top,
        end_n_top=FLAGS.end_n_top,
        dropout_prob=FLAGS.dropout_prob,
        hub_module=FLAGS.albert_hub_module_handle,
        tag_info=tag_info)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = contrib_tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        predict_batch_size=FLAGS.predict_batch_size)

    if FLAGS.do_train:
        # We write to a temporary file to avoid storing very large constant tensors
        # in memory.

        # if not tf.gfile.Exists(FLAGS.train_feature_file):
        #     train_writer = squad_utils.FeatureWriter(
        #         filename=os.path.join(FLAGS.train_feature_file), is_training=True)
        #     squad_utils.convert_examples_to_features(
        #         examples=train_examples,
        #         tokenizer=tokenizer,
        #         max_seq_length=FLAGS.max_seq_length,
        #         doc_stride=FLAGS.doc_stride,
        #         max_query_length=FLAGS.max_query_length,
        #         is_training=True,
        #         output_fn=train_writer.process_feature,
        #         do_lower_case=FLAGS.do_lower_case)
        #     train_writer.close()

        tf.logging.info("***** Running training *****")
        # tf.logging.info("  Num orig examples = %d", len(train_examples))
        # tf.logging.info("  Num split examples = %d", train_writer.num_features)
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        tf.logging.info("  Num steps = %d", num_train_steps)
        # del train_examples

        train_input_fn = squad_utils.input_fn_builder(
            input_file=FLAGS.train_feature_file,
            seq_length=FLAGS.max_seq_length,
            is_training=True,
            drop_remainder=True,
            use_tpu=FLAGS.use_tpu,
            bsz=FLAGS.train_batch_size,
            is_v2=True)
        estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)

    if FLAGS.do_predict:
        import dill
        # with tf.gfile.Open(FLAGS.predict_file) as predict_file:
        #     prediction_json = json.load(predict_file)["data"]
        # eval_examples = squad_utils.read_squad_examples(
        #     input_file=FLAGS.predict_file, is_training=False)

        if (tf.gfile.Exists(FLAGS.predict_feature_file)
                and tf.gfile.Exists(FLAGS.predict_feature_left_file)
                and tf.gfile.Exists(FLAGS.predict_example_file)):
            tf.logging.info("Loading eval features from {}".format(
                FLAGS.predict_feature_left_file))
            with tf.gfile.Open(FLAGS.predict_feature_left_file, "rb") as fin:
                eval_features = dill.load(fin)
            with tf.gfile.Open(FLAGS.predict_example_file, "rb") as fin:
                eval_examples = dill.load(fin)
        # else:
        #     eval_writer = squad_utils.FeatureWriter(
        #     filename=FLAGS.predict_feature_file, is_training=False)
        # eval_features = []
        #
        # def append_feature(feature):
        #     eval_features.append(feature)
        #     eval_writer.process_feature(feature)
        #
        # squad_utils.convert_examples_to_features(
        #     examples=eval_examples,
        #     tokenizer=tokenizer,
        #     max_seq_length=FLAGS.max_seq_length,
        #     doc_stride=FLAGS.doc_stride,
        #     max_query_length=FLAGS.max_query_length,
        #     is_training=False,
        #     output_fn=append_feature,
        #     do_lower_case=FLAGS.do_lower_case)
        # eval_writer.close()
        #
        # with tf.gfile.Open(FLAGS.predict_feature_left_file, "wb") as fout:
        #     pickle.dump(eval_features, fout)

        tf.logging.info("***** Running predictions *****")
        tf.logging.info("  Num orig examples = %d", len(eval_examples))
        tf.logging.info("  Num split examples = %d", len(eval_features))
        tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

        predict_input_fn = squad_utils.input_fn_builder(
            input_file=FLAGS.predict_feature_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=False,
            use_tpu=FLAGS.use_tpu,
            bsz=FLAGS.predict_batch_size,
            is_v2=True)

        def get_result(checkpoint):
            """Evaluate the checkpoint on SQuAD v2.0."""
            # If running eval on the TPU, you will need to specify the number of
            # steps.
            all_results = []
            for result in estimator.predict(predict_input_fn,
                                            yield_single_examples=True,
                                            checkpoint_path=checkpoint):
                if len(all_results) % 1000 == 0:
                    tf.logging.info("Processing example: %d" %
                                    (len(all_results)))
                unique_id = int(result["unique_ids"])
                crf_logits = result["crf_logits"]
                transition_params = result["transition_params"]
                softmax = result["softmax"]
                all_results.append(
                    squad_utils.RawResultV2(
                        unique_id=unique_id,
                        crf_logits=crf_logits,
                        softmax=softmax,
                        transition_params=transition_params,
                    ))

            output_prediction_file = os.path.join(FLAGS.output_dir,
                                                  "predictions.json")

            predictions = squad_utils.write_predictions_et(
                eval_examples, eval_features, all_results,
                FLAGS.max_answer_length, tag_info)

            import numpy

            class MyEncoder(json.JSONEncoder):
                def default(self, o):
                    if isinstance(o, numpy.integer):
                        return int(o)
                    if isinstance(o, numpy.floating):
                        return float(o)
                    if isinstance(o, numpy.ndarray):
                        return o.tolist()
                    else:
                        return super(MyEncoder, self).default(o)

            with tf.gfile.Open(output_prediction_file, 'w') as f:
                json.dump(predictions, f, ensure_ascii=False, cls=MyEncoder)

        latest_checkpoint = tf.train.latest_checkpoint(FLAGS.output_dir)
        get_result(latest_checkpoint)
Esempio n. 51
0
def load_obj(path):
    """
    Loads object
    """
    return dill.load(open(path, 'rb'))
Esempio n. 52
0
    def load(location, do_unzip_and_model_type_check=True):
        """
        Method used to load a container from the file system.

        Args:
            location: The location on the file system where to load the model.
            do_unzip_and_model_type_check: Whether to unzip the model and check the type.

        Returns:
            The loaded model.
        """
        assert tvm_installed(), "TVM Container requires TVM installed."

        _load_param_dict = tvm._ffi.get_global_func("tvm.relay._load_param_dict")

        # We borrow this function directly from Relay.
        # Relay when imported tryies to download schedules data,
        # but at inference time access to disk or network could be blocked.
        def load_param_dict(param_bytes):
            if isinstance(param_bytes, (bytes, str)):
                param_bytes = bytearray(param_bytes)
            load_arr = _load_param_dict(param_bytes)
            return {v.name: v.array for v in load_arr}

        container = None

        if do_unzip_and_model_type_check:
            # Unzip the dir.
            zip_location = location
            if not location.endswith("zip"):
                zip_location = location + ".zip"
            else:
                location = zip_location[:-4]
            assert os.path.exists(zip_location), "Zip file {} does not exist.".format(zip_location)
            shutil.unpack_archive(zip_location, location, format="zip")

            assert os.path.exists(location), "Model location {} does not exist.".format(location)

            # Load the model type.
            with open(os.path.join(location, constants.SAVE_LOAD_MODEL_TYPE_PATH), "r") as file:
                model_type = file.readline()
            if model_type != "tvm":
                shutil.rmtree(location)
                raise RuntimeError("Expected TVM model type, got {}".format(model_type))

        # Check the versions of the modules used when saving the model.
        if os.path.exists(os.path.join(location, constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH)):
            with open(os.path.join(location, constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH), "r") as file:
                configuration = file.readlines()
            check_dumped_versions(configuration, hummingbird, torch)
        else:
            warnings.warn(
                "Cannot find the configuration file with versions. You are likely trying to load a model saved with an old version of Hummingbird."
            )

        # Load the actual model.
        path_lib = os.path.join(location, constants.SAVE_LOAD_TVM_LIB_PATH)
        graph = open(os.path.join(location, constants.SAVE_LOAD_TVM_GRAPH_PATH)).read()
        lib = tvm.runtime.module.load_module(path_lib)
        params = load_param_dict(open(os.path.join(location, constants.SAVE_LOAD_TVM_PARAMS_PATH), "rb").read())

        # Load the container.
        with open(os.path.join(location, constants.SAVE_LOAD_CONTAINER_PATH), "rb") as file:
            container = dill.load(file)
        if container is None:
            shutil.rmtree(location)
            raise RuntimeError("Failed to load the model container.")

        # Setup the container.
        ctx = tvm.cpu() if container._ctx == "cpu" else tvm.gpu
        container._model = graph_runtime.create(graph, lib, ctx)
        container._model.set_input(**params)

        container._extra_config[constants.TVM_GRAPH] = graph
        container._extra_config[constants.TVM_LIB] = lib
        container._extra_config[constants.TVM_PARAMS] = params
        container._extra_config[constants.TVM_CONTEXT] = ctx
        container._ctx = ctx
        container._tvm_tensors = {name: container._to_tvm_array(np.array([])) for name in container._input_names}

        # Need to set the number of threads to use as set in the original container.
        os.environ["TVM_NUM_THREADS"] = str(container._n_threads)
        shutil.rmtree(location)

        return container
import os
import dill as pickle
import numpy as np
import lda
from rpy2 import robjects
from rpy2.robjects.packages import importr
from rpy2.robjects.vectors import IntVector, FloatVector

# set working directory
os.chdir(
    "/Users/annekespeijers/Desktop/BGSE/Term3/TextMining/Homework/Project/")

# load in corpus
with open('./data/corpus.pkl', 'rb') as input:
    corpus = pickle.load(input)

# doc term matrix
X = corpus.document_term_matrix(corpus.token_set)
X = X.astype(int)

# get vocab, article titles and article comments
vocab = tuple(corpus.token_set)
titles = tuple([t.title for t in corpus.docs])
comments = tuple([com.comments for com in corpus.docs])

## fit the model: k = 2
k = 2
model = lda.LDA(n_topics=k,
                n_iter=500,
                random_state=1,
                eta=200 / float(len(vocab)),
Esempio n. 54
0
def main():
    if not os.path.exists(os.path.join("saved", model_name)):
        os.makedirs(os.path.join("saved", model_name))

    data_path = '../../data/records_final.pkl'
    voc_path = '../../data/voc_final.pkl'
    device = torch.device('cuda:0')

    data = dill.load(open(data_path, 'rb'))
    voc = dill.load(open(voc_path, 'rb'))
    diag_voc, pro_voc, med_voc = voc['diag_voc'], voc['pro_voc'], voc[
        'med_voc']

    split_point = int(len(data) * 2 / 3)
    data_train = data[:split_point]
    eval_len = int(len(data[split_point:]) / 2)
    data_test = data[split_point:split_point + eval_len]
    data_eval = data[split_point + eval_len:]
    voc_size = (len(diag_voc.idx2word), len(pro_voc.idx2word),
                len(med_voc.idx2word))

    EPOCH = 30
    LR = 0.0002
    TEST = False
    END_TOKEN = voc_size[2] + 1

    model = Leap(voc_size, device=device)
    if TEST:
        model.load_state_dict(
            torch.load(
                open(os.path.join("saved", model_name, resume_name), 'rb')))
        # pass

    model.to(device=device)
    print('parameters', get_n_params(model))

    optimizer = Adam(model.parameters(), lr=LR)

    if TEST:
        eval(model, data_test, voc_size, 0)
    else:
        history = defaultdict(list)
        for epoch in range(EPOCH):
            loss_record = []
            start_time = time.time()
            model.train()
            for step, input in enumerate(data_train):
                for adm in input:
                    loss_target = adm[2] + [END_TOKEN]
                    output_logits = model(adm)
                    loss = F.cross_entropy(
                        output_logits,
                        torch.LongTensor(loss_target).to(device))

                    loss_record.append(loss.item())

                    optimizer.zero_grad()
                    loss.backward(retain_graph=True)
                    optimizer.step()

                llprint('\rTrain--Epoch: %d, Step: %d/%d' %
                        (epoch, step, len(data_train)))

            ddi_rate, ja, prauc, avg_p, avg_r, avg_f1 = eval(
                model, data_eval, voc_size, epoch)
            history['ja'].append(ja)
            history['ddi_rate'].append(ddi_rate)
            history['avg_p'].append(avg_p)
            history['avg_r'].append(avg_r)
            history['avg_f1'].append(avg_f1)
            history['prauc'].append(prauc)

            end_time = time.time()
            elapsed_time = (end_time - start_time) / 60
            llprint(
                '\tEpoch: %d, Loss1: %.4f, One Epoch Time: %.2fm, Appro Left Time: %.2fh\n'
                % (epoch, np.mean(loss_record), elapsed_time, elapsed_time *
                   (EPOCH - epoch - 1) / 60))

            torch.save(
                model.state_dict(),
                open(
                    os.path.join(
                        'saved', model_name,
                        'Epoch_%d_JA_%.4f_DDI_%.4f.model' %
                        (epoch, ja, ddi_rate)), 'wb'))
            print('')

        dill.dump(history,
                  open(os.path.join('saved', model_name, 'history.pkl'), 'wb'))
        # test
        torch.save(
            model.state_dict(),
            open(os.path.join('saved', model_name, 'final.model'), 'wb'))
Esempio n. 55
0
def fine_tune(fine_tune_name=''):
    data_path = '../../data/records_final.pkl'
    voc_path = '../../data/voc_final.pkl'
    device = torch.device('cuda:0')

    data = dill.load(open(data_path, 'rb'))
    voc = dill.load(open(voc_path, 'rb'))
    diag_voc, pro_voc, med_voc = voc['diag_voc'], voc['pro_voc'], voc[
        'med_voc']
    ddi_A = dill.load(open('../../data/ddi_A_final.pkl', 'rb'))

    split_point = int(len(data) * 2 / 3)
    data_train = data[:split_point]
    eval_len = int(len(data[split_point:]) / 2)
    data_test = data[split_point:split_point + eval_len]
    # data_eval = data[split_point+eval_len:]
    voc_size = (len(diag_voc.idx2word), len(pro_voc.idx2word),
                len(med_voc.idx2word))

    model = Leap(voc_size, device=device)
    model.load_state_dict(
        torch.load(
            open(os.path.join("saved", model_name, fine_tune_name), 'rb')))
    model.to(device)

    EPOCH = 30
    LR = 0.0001
    END_TOKEN = voc_size[2] + 1

    optimizer = Adam(model.parameters(), lr=LR)
    ddi_rate_record = []
    for epoch in range(1):
        loss_record = []
        start_time = time.time()
        random_train_set = [
            random.choice(data_train) for i in range(len(data_train))
        ]
        for step, input in enumerate(random_train_set):
            model.train()
            K_flag = False
            for adm in input:
                target = adm[2]
                output_logits = model(adm)
                out_list, sorted_predict = sequence_output_process(
                    output_logits.detach().cpu().numpy(),
                    [voc_size[2], voc_size[2] + 1])

                inter = set(out_list) & set(target)
                union = set(out_list) | set(target)
                jaccard = 0 if union == 0 else len(inter) / len(union)
                K = 0
                for i in out_list:
                    if K == 1:
                        K_flag = True
                        break
                    for j in out_list:
                        if ddi_A[i][j] == 1:
                            K = 1
                            break

                loss = -jaccard * K * torch.mean(
                    F.log_softmax(output_logits, dim=-1))

                loss_record.append(loss.item())

                optimizer.zero_grad()
                loss.backward(retain_graph=True)
                optimizer.step()

            llprint('\rTrain--Epoch: %d, Step: %d/%d' %
                    (epoch, step, len(data_train)))

            if K_flag:
                ddi_rate, ja, prauc, avg_p, avg_r, avg_f1 = eval(
                    model, data_test, voc_size, epoch)

                end_time = time.time()
                elapsed_time = (end_time - start_time) / 60
                llprint(
                    '\tEpoch: %d, Loss1: %.4f, One Epoch Time: %.2fm, Appro Left Time: %.2fh\n'
                    %
                    (epoch, np.mean(loss_record), elapsed_time, elapsed_time *
                     (EPOCH - epoch - 1) / 60))

                torch.save(
                    model.state_dict(),
                    open(
                        os.path.join(
                            'saved', model_name,
                            'fine_Epoch_%d_JA_%.4f_DDI_%.4f.model' %
                            (epoch, ja, ddi_rate)), 'wb'))
                print('')

    # test
    torch.save(model.state_dict(),
               open(os.path.join('saved', model_name, 'final.model'), 'wb'))
Esempio n. 56
0
    with open(filename, 'wb') as fh:
        dill.dump(
            dict(seed=seed,
                 dhids=dhids,
                 epochs=epochs,
                 n_per_batch=n_per_batch,
                 eta=eta,
                 alpha=alpha,
                 weights=weights,
                 tau_rc=tau_rc,
                 amp=amp,
                 learners=learners), fh)
else:
    with open(filename, 'rb') as fh:
        filedata = dill.load(fh)
        globals().update(filedata)

# --- plot results (cols=[train, test], traces=learners)
rows = 1
cols = 2

plt.figure(figsize=(7, 4))

# - train subplot
ax = plt.subplot(rows, cols, 1)

# filt = Alpha(3000, default_dt=n_per_batch)
filt = Alpha(10000, default_dt=n_per_batch)

for learner in learners:
Esempio n. 57
0
srep.viz.plotting_style()
#%%
fig, ax = plt.subplots(2, 2, figsize=(9, 9))

# ###########################################################################
# 95% HPD for identifiable expts promoters
# ###########################################################################
# # loop thru df, not all_samples keys, so we get deterministic order!
expt_labels = ("O2_0p5ngmL", "O2_1ngmL", "Oid_1ngmL", "O3_10ngmL")
var_labels = ["k_burst", "b", "kR_on", "kR_off"]
color_keys = ["green", "blue", "red", "purple"]
for i, expt in enumerate(expt_labels):
    # unpickle sampler, then convert to arviz InfDat obj
    pklfile = open(f"{repo_rootdir}/data/mcmc_samples/{expt}_sampler.pkl",
                   'rb')
    sampler = dill.load(pklfile)
    pklfile.close()

    inf_dat = az.convert_to_inference_data(sampler, var_names=var_labels)
    kR_on_samples = inf_dat.posterior.kR_on.values.flatten()
    kR_off_samples = inf_dat.posterior.kR_off.values.flatten()
    x_contour, y_contour = bebi103.viz.contour_lines_from_samples(
        kR_off_samples, kR_on_samples, levels=0.95, smooth=0.025)
    ax[0, 1].plot(x_contour[0],
                  y_contour[0],
                  label=expt,
                  linewidth=0.6,
                  color=colors[color_keys[i]])
# ax[0,1].set_xlim(-2,2)
# ax[0,1].set_ylim(-2,2)
ax[0, 1].set_ylabel(r'$log_{10}(k_R^+/\gamma)$')
import time
from sklearn.preprocessing import MinMaxScaler
from operator import add


def testindexing():
    graph_raw_data = open("graph.pkl", "rb")
    graph_data = pickle.load(graph_raw_data)
    print(graph_data)


if __name__ == "__main__":
    testindexing()
    #need to update revised in gce
    raw_data = open("revised_total_data.pkl", "rb")
    X_total, X_tr, y_tr, X_te = pickle.load(raw_data)
    test = np.array([0])
    print(X_total.shape)
    #remove null island
    graph_raw_data = open("graph.pkl", "rb")
    graph_data = pickle.load(graph_raw_data)
    first_ids = graph_data[:, 0]
    connected_ids = graph_data[:, 1]
    training_ids = X_total[:, 0]
    added_features = []
    count = 0
    length_graph = first_ids.shape[0]
    length_training = training_ids.shape[0]
    for row in X_total:
        id_first = row[0]
        index = np.searchsorted(first_ids, id_first)
def testindexing():
    graph_raw_data = open("graph.pkl", "rb")
    graph_data = pickle.load(graph_raw_data)
    print(graph_data)
Esempio n. 60
-1
def experiment_LMKLIEP(which_d):
    with open('feat_vec_out.'+which_d+'.en.pickle', 'rb') as handle:
        out_d = pickle.load(handle)

    with open('feat_vec_in.'+which_d+'.en.pickle', 'rb') as handle:
        in_d = pickle.load(handle)

    labels = - np.ones(out_d.shape[0])
    predictions = - np.ones(out_d.shape[0])
    labels[-50000:] = 1

    kliep = KLIEP(init_b=100, seed=0)
    kliep.fit_CV(out_d, in_d)

    w = kliep.predict(out_d).ravel()
    predictions[np.where(w > 1)[0]] = 1   # w = p_te/p_tr
    print 'total positive:', np.where(predictions == 1)[0].shape, ', out of:', out_d.shape[0]
    # sorted_ind = np.argsort(w, axis=None)[::-1]
    # predictions[sorted_ind[0:50000]] = 1

    p, r, f, s = precision_recall_fscore_support(labels.astype(int), predictions.astype(int), pos_label=1, average='micro')
    print 'Precision:', p,
    print 'Recall:', r,
    print 'F1:', f,
    print 'Support:', s,