def load(self, kind): start = datetime.datetime.now() path, file = os.path.split(self.training_file) with open('trained/'+file[:-4]+'_'+kind+'_'+str(self.ngram_size)+'_'+str(self.length)+'.pack', 'rb') as fp: if kind == "ip_list": self.ip_list = umsgpack.load(fp) elif kind == "cp_list": self.cp_list = umsgpack.load(fp) elif kind == "ep_list": self.ep_list = umsgpack.load(fp) else: raise Exception("Unknown list given (required: ip_list, cp_list, or ep_list)") logging.debug("Done! Everything loaded from disk.") logging.debug("Loading the data from disk took: {}".format(datetime.datetime.now()-start))
def read_msgpack_bin_from_disk(filename): """ Extract stored msgpack data from disk """ with open(filename, 'rb') as f: #return umsgpack.unpack(f) return umsgpack.load(f)
def blob_printer(data, exception): if exception: import traceback print "EXCEPTION!" traceback.print_exc(exception) elif data: data = StringIO(data) print '=========================================================' while True: try: msg = msgpack.load(data) except msgpack.InsufficientDataException: break if msg['type'].endswith('content'): print "chunk size", len(msg['data']) elif msg['type'] == 'dirview': line = msg['data']['root'] + ':' for k, v in msg['data'].iteritems(): if k == 'root': continue line += ' {}:{}'.format(k, len(v)) print line else: print "DATA:", msg print '========================================================='
def _load_file(path): with open(path, 'br') as fileobj: while True: try: yield umsgpack.load(fileobj) except umsgpack.InsufficientDataException: break
def read_msgpack_bin_from_disk(filename): """ Extract stored msgpack data from disk """ try: with open(filename, 'rb') as msgpack_data: return umsgpack.load(msgpack_data) except IOError: print("File not found: " + filename) return None
def test_is_prime(benchmark: Any) -> None: def func(set_of_primes): last = 2 for x, y in zip(primes(), set_of_primes): assert is_prime(x) assert x == y for z in range(last + 1, x): assert not is_prime(z) last = x if ONLY_SLOW or NO_OPTIONAL_TESTS: skip() with PY_FOLDER.joinpath('primes.mpack').open('rb') as f: set_of_primes = load(f) # set of first million primes benchmark.pedantic(func, args=(set_of_primes, ), iterations=1, rounds=1) if hasattr(benchmark, 'stats') and benchmark.stats.stats.max > ( 200 * 1_000_000 / 1_000_000): # 200ns * primes fail("Exceeding 200ns average!")
def username_generator(): mkv_lvl = 310 mkv_len = 16 acceptlang = flask.g.get('acceptlang') accepttw = flask.g.get('accepttw') try: num = int(flask.request.args.get('num', 100)) except Exception: num = 100 fjson = flask.request.is_xhr or flask.request.args.get('f') == "json" unamemodel = getattr(flask.g, 'unamemodel', None) if unamemodel is None: unamemodel = flask.g.unamemodel = markov.MarkovModel( os.path.join(OS_DATA, 'stats_user.txt')) cachefn = os.path.join(OS_DATA, 'stats_user_%d-%d.msgp' % (mkv_lvl, mkv_len)) if os.path.isfile(cachefn): with open(cachefn, 'rb') as f: nbparts = umsgpack.load(f) idxrange = unamemodel.init(mkv_lvl, mkv_len, nbparts) else: idxrange = unamemodel.init(mkv_lvl, mkv_len) with open(cachefn, 'wb') as f: umsgpack.dump(unamemodel.nbparts, f) else: idxrange = unamemodel[(0, 0, 0)] names = [ unamemodel.print_pwd(random.randrange(idxrange))[0] for x in range(num) ] if fjson: return flask.jsonify({'usernames': names}) uselang = (max( ('en', 'zh-cn', 'zh-tw', 'zh'), key=lambda x: acceptlang.get(x, 0)) if acceptlang else 'en') if uselang.startswith('zh'): if uselang == 'zh-tw': tmpl = flask.render_template('username_zhtw.html', usernames=names) else: tmpl = flask.render_template('username_zhcn.html', usernames=names) else: tmpl = flask.render_template('username_en.html', usernames=names) return tmpl
def test_load_short_read(self): # When reading from files, the network, etc. # there's no guarantee that read(n) returns n bytes # so you need to keep calling read() until you get all the data. class File(object): def __init__(self, data): self._data = data def read(self, n=None): if n is None or n <= 0 or len(self._data) == 0: data, self._data = self._data, b'' return data n = int(math.ceil(n / 2.0)) self._data, data = self._data[n:], self._data[:n] return data p = {'hello': 'world'} file = File(umsgpack.dumps(p)) q = umsgpack.load(file) self.assertEqual(p, q)
def test_load_short_read(self): # When reading from files, the network, etc. there's no guarantee that # read(n) returns n bytes. Simulate this with a file-like object that # returns 1 byte at a time. class SlowFile(object): def __init__(self, data): self._data = data def read(self, n=None): if n is None or len(self._data) == 0: data, self._data = self._data, b'' return data chunk = self._data[0:1] self._data = self._data[1:] return chunk obj = {'hello': 'world'} f = SlowFile(umsgpack.dumps(obj)) unpacked = umsgpack.load(f) self.assertEqual(unpacked, obj)
def test_load_short_read(self): # When reading from files, the network, etc. there's no guarantee that # read(n) returns n bytes. Simulate this with a file-like object that # returns 1 byte at a time. class SlowFile(object): def __init__(self, data): self._data = data def read(self, n=None): if n is None or len(self._data) == 0: data, self._data = self._data, b'' return data chunk = self._data[0:1] self._data = self._data[1:] return chunk obj = {'hello': 'world'} f = SlowFile(umsgpack.dumps(obj)) unpacked = umsgpack.load(f) self.assertEqual(unpacked, obj)
def load_meta(meta_file): with open(meta_file, "rb") as fid: data = umsgpack.load(fid, encoding="utf-8") meta = data["meta"] return meta
def main(): #global argv start_time = time.clock() print 'Entering the main thread to start the program' #s = '' #digits = None #app2.run(debug=True) data = pd.read_csv('Review_chennai.csv', sep='|') print data.head() clean_rateofreview = lambda x: str(x).split()[1] data['rating'] = data['rateofreview'].apply(clean_rateofreview) revs = data.loc[:, ['r_name', 'reviewtext', 'rating']] print revs.head() print revs.count() for i in list(np.where(pd.isnull(revs))): revs.drop(revs.index[i], inplace=True) print revs.count() revs_new = revs[revs['rating'] != '3.0'] revs_new['sentiment'] = revs_new['rating'] >= '3.5' print revs_new.head() revs_new['sentiment'] = revs_new['sentiment'].apply(binarize_sentiment) print revs_new.head() vectorer = TfidfVectorizer(min_df=2, ngram_range=(1, 2), stop_words='english') #vectorer = TfidfVectorizer(min_df=2,ngram_range=(1,2)) bow = vectorer.fit_transform(revs_new['reviewtext']) target = revs_new['sentiment'].values n_samples, n_features = bow.shape print '#######################################' print n_samples, n_features print len(vectorer.get_feature_names()) print vectorer.get_feature_names()[:10] print vectorer.get_feature_names()[n_features // 2:n_features // 2 + 50] #print vectorer.vocabulary_ features_train, features_test, target_train, target_test = train_test_split( bow, target, test_size=0.20, random_state=1) print features_train.shape print target_train.shape print features_test.shape print target_test.shape logreg = LogisticRegression(C=1) logreg.fit(features_train, target_train) target_predicted = logreg.predict(features_test) print target_predicted print 'Testing Accuracy is ', accuracy_score(target_test, target_predicted) print 'Training Accuracy is', logreg.score(features_train, target_train) print 'Testing Accuracy is', logreg.score(features_test, target_test) TESTDATA1 = StringIO("""Review 1;Sushi is Amazing 2;Sushi is bad 3;Sushi is not good 4;Sushi is beautiful 5;Sushi is bad terrible and good 6;Sushi is amazing bad and terrible 7;Sushi is amazing terrible horrible and bad 8;Sushi is not awesome 9;Sushi is not great 10;Sushi is very bad 11;Sushi is not brilliant 12;Sushi is unpleasant 13;Sushi is pleasant """) # print '################################' # print 'Number of arguments:', len(sys.argv), 'arguments.' # print 'Argument List:', str(sys.argv) # print 'test review is ', sys.argv[1] # test_review = str(sys.argv[1]) # TESTDATA=StringIO("""Review # ;""" + test_review) df1 = DataFrame.from_csv(TESTDATA1, sep=";", parse_dates=False) print df1 test_bow = vectorer.transform(df1['Review']) prediction = logreg.predict(test_bow) print prediction timedump_joblib = time.clock() # pickling the models from sklearn.externals import joblib joblib.dump(vectorer, 'BiGram_Vectorizer.pkl') joblib.dump(logreg, 'BiGram_Log_Reg_Model.pkl') print 'Time for joblib dumping of models: ', time.clock() - timedump_joblib timedump = time.clock() f = open('vect.bin', 'wb') g = open('model.bin', 'wb') #umsgpack.dump({u"compact": True, u"schema": 0}, f) umsgpack.dump(vectorer, f) umsgpack.dump(logreg, g) f.close() print 'Time for umsgpack dumping of models: ', time.clock() - timedump timeload = time.clock() f = open('vect.bin', 'rb') g = open('model.bin', 'rb') vectorer1 = umsgpack.load(f) logreg1 = umsgpack.load(g) print 'Time for umsgpack loading of models: ', time.clock() - timeload print 'Loaded Vectorer is \n', vectorer1 print 'Loaded Model is \n', logreg1 print time.clock() - start_time, "seconds"
def read_radmap(args, base_path, fff): import numpy as np import os import rasterio from math import floor, ceil import umsgpack from scipy.interpolate.fitpack2 import RectBivariateSpline from collections import defaultdict import datetime import scipy.stats as stats rad_basedir = os.path.join(base_path, r'quantile_maps', r'quantile_maps_monthly') # rad_basedir_year = os.path.join(base_path, r'quantile_maps', r'quantile_maps_monthly_allyears') h, w, lat, lon, bbox84, base_paths, outdir = args days = [ 31.0, 28.0, 31.0, 30.0, 31.0, 30.0, 31.0, 31.0, 30.0, 31.0, 30.0, 31.0 ] alpha_degs = np.arange(-180.0, 181.0, 15.0) beta_degs = np.arange(0.0, 95.0, 10.0) rad_inp = defaultdict(dict) rad_inp_rst = defaultdict(dict) # for radkey in['avg', 'min', '25', '75', 'max']: for radkey in ['avg']: for radtype in ['sis', 'dif', 'dir']: rad_inp_rst[(radkey, radtype)] = np.zeros((12, 38, 91, 361), dtype=np.uint16) for month in range(1, 13): # for month in range(1, 2): fff.write("{} interpolate radiation month {}".format( str(datetime.datetime.now()), month)) fff.write("\n") fff.flush() inparas = [ 'rad', 'h', int(h), 'w', int(w), 'lat', lat, 'lon', lon, 'month', month ] in_filename = "_".join(list(map(str, inparas))) + ".mp" in_path = os.path.join(rad_basedir, in_filename) with open(in_path, 'rb') as f: data_avg = umsgpack.load(f) for hour in range(3, 22): for mins in [0, 30]: hkey = (hour, mins) z_sis = np.zeros((len(beta_degs), len(alpha_degs))) z_dif = np.zeros((len(beta_degs), len(alpha_degs))) z_dir = np.zeros((len(beta_degs), len(alpha_degs))) for i, beta_deg in enumerate(beta_degs): for j, alpha_deg in enumerate(alpha_degs): akey = (alpha_deg, beta_deg) z_sis[i, j] = float( data_avg[hkey][akey]['sis']['avg']) * days[month - 1] * 0.5 z_dif[i, j] = float( data_avg[hkey][akey]['dif']['avg']) * days[month - 1] * 0.5 z_dir[i, j] = float( data_avg[hkey][akey]['sid']['avg']) * days[month - 1] * 0.5 rad_inp[('avg', 'sis', month)][hkey] = RectBivariateSpline( beta_degs, alpha_degs, z_sis) rad_inp[('avg', 'dif', month)][hkey] = RectBivariateSpline( beta_degs, alpha_degs, z_dif) rad_inp[('avg', 'dir', month)][hkey] = RectBivariateSpline( beta_degs, alpha_degs, z_dir) del hour, mins, hkey, i, j, beta_deg, alpha_deg for radkey in ['avg']: # for radkey in['avg', 'min', '25', '75', 'max']: for radtype in ['sis', 'dif', 'dir']: for hour in range(3, 22): for mins in [0, 30]: hkey = (hour, mins) RBS = rad_inp[(radkey, radtype, month)][hkey] interpolated = RBS(np.arange(0, 91, 1), np.arange(-180, 181, 1), grid=True).astype(np.float32) interpolated[interpolated < 0] = 0 mmax = np.max(interpolated) if mmax > 65535: fff.write( "{}: Overflow : {} / {} {} {} {} ".format( str(datetime.datetime.now()), mmax, radkey, radtype, hour, mins)) fff.write("\n") fff.flush() t = int(hour * 2 + mins / 30) - 6 rad_inp_rst[(radkey, radtype)][month - 1, t, :, :] = interpolated.astype( np.uint16) mmax = 0.0 mmin = 100000000 for k in rad_inp_rst: fff.write("{}: Max/min rad for {}: {}/{}".format( str(datetime.datetime.now()), k, np.max(rad_inp_rst[k]), np.min(rad_inp_rst[k]))) fff.write("\n") mmax = max(mmax, np.max(rad_inp_rst[k])) mmin = min(mmin, np.min(rad_inp_rst[k])) fff.write("{}: Max/min rad overall: {}/{}".format( str(datetime.datetime.now()), mmax, mmin)) fff.write("\n") fff.flush() return rad_inp_rst
def load(): data = [] with open("api.p", mode="rb") as p: data = pickle.load(p) return data
What is the largest prime factor of the number 600851475143 ? """ from itertools import count, takewhile from math import ceil, sqrt from pathlib import Path from typing import Dict, Iterator, Optional from sortedcontainers import SortedSet from umsgpack import load cache_filename = 'p0003_cache.mpack' try: with Path(__file__).parent.joinpath(cache_filename).open('rb') as f: cache = SortedSet(load(f)) except Exception: cache = SortedSet( [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61]) last_cached: int = cache[-1] + 2 def primes(stop: Optional[int] = None) -> Iterator[int]: if stop is None: yield from cache else: yield from takewhile(stop.__gt__, cache) global last_cached if stop and last_cached - 2 > stop: return if stop is None: