Example #1
0
 def load(self, kind):
     start = datetime.datetime.now()
     path, file = os.path.split(self.training_file)
     with open('trained/'+file[:-4]+'_'+kind+'_'+str(self.ngram_size)+'_'+str(self.length)+'.pack', 'rb') as fp:
         if kind == "ip_list":
             self.ip_list = umsgpack.load(fp)
         elif kind == "cp_list":
             self.cp_list = umsgpack.load(fp)
         elif kind == "ep_list":
             self.ep_list = umsgpack.load(fp)
         else:
             raise Exception("Unknown list given (required: ip_list, cp_list, or ep_list)")
     logging.debug("Done! Everything loaded from disk.")
     logging.debug("Loading the data from disk took: {}".format(datetime.datetime.now()-start))
Example #2
0
def read_msgpack_bin_from_disk(filename):
    """
	Extract stored msgpack data from disk
	"""
    with open(filename, 'rb') as f:
        #return umsgpack.unpack(f)
        return umsgpack.load(f)
Example #3
0
    def blob_printer(data, exception):
        if exception:
            import traceback
            print "EXCEPTION!"
            traceback.print_exc(exception)
        elif data:
            data = StringIO(data)
            print '========================================================='

            while True:
                try:
                    msg = msgpack.load(data)
                except msgpack.InsufficientDataException:
                    break

                if msg['type'].endswith('content'):
                    print "chunk size", len(msg['data'])
                elif msg['type'] == 'dirview':
                    line = msg['data']['root'] + ':'
                    for k, v in msg['data'].iteritems():
                        if k == 'root':
                            continue
                        line += ' {}:{}'.format(k, len(v))
                    print line

                else:
                    print "DATA:", msg

            print '========================================================='
def _load_file(path):
    with open(path, 'br') as fileobj:
        while True:
            try:
                yield umsgpack.load(fileobj)
            except umsgpack.InsufficientDataException:
                break
def read_msgpack_bin_from_disk(filename):
    """
	Extract stored msgpack data from disk
	"""
    try:
        with open(filename, 'rb') as msgpack_data:
            return umsgpack.load(msgpack_data)
    except IOError:
        print("File not found: " + filename)
        return None
Example #6
0
def test_is_prime(benchmark: Any) -> None:
    def func(set_of_primes):
        last = 2
        for x, y in zip(primes(), set_of_primes):
            assert is_prime(x)
            assert x == y
            for z in range(last + 1, x):
                assert not is_prime(z)
            last = x

    if ONLY_SLOW or NO_OPTIONAL_TESTS:
        skip()
    with PY_FOLDER.joinpath('primes.mpack').open('rb') as f:
        set_of_primes = load(f)  # set of first million primes
    benchmark.pedantic(func, args=(set_of_primes, ), iterations=1, rounds=1)
    if hasattr(benchmark, 'stats') and benchmark.stats.stats.max > (
            200 * 1_000_000 / 1_000_000):  # 200ns * primes
        fail("Exceeding 200ns average!")
Example #7
0
def username_generator():
    mkv_lvl = 310
    mkv_len = 16
    acceptlang = flask.g.get('acceptlang')
    accepttw = flask.g.get('accepttw')
    try:
        num = int(flask.request.args.get('num', 100))
    except Exception:
        num = 100
    fjson = flask.request.is_xhr or flask.request.args.get('f') == "json"
    unamemodel = getattr(flask.g, 'unamemodel', None)
    if unamemodel is None:
        unamemodel = flask.g.unamemodel = markov.MarkovModel(
            os.path.join(OS_DATA, 'stats_user.txt'))
        cachefn = os.path.join(OS_DATA,
                               'stats_user_%d-%d.msgp' % (mkv_lvl, mkv_len))
        if os.path.isfile(cachefn):
            with open(cachefn, 'rb') as f:
                nbparts = umsgpack.load(f)
            idxrange = unamemodel.init(mkv_lvl, mkv_len, nbparts)
        else:
            idxrange = unamemodel.init(mkv_lvl, mkv_len)
            with open(cachefn, 'wb') as f:
                umsgpack.dump(unamemodel.nbparts, f)
    else:
        idxrange = unamemodel[(0, 0, 0)]
    names = [
        unamemodel.print_pwd(random.randrange(idxrange))[0] for x in range(num)
    ]
    if fjson:
        return flask.jsonify({'usernames': names})
    uselang = (max(
        ('en', 'zh-cn', 'zh-tw',
         'zh'), key=lambda x: acceptlang.get(x, 0)) if acceptlang else 'en')
    if uselang.startswith('zh'):
        if uselang == 'zh-tw':
            tmpl = flask.render_template('username_zhtw.html', usernames=names)
        else:
            tmpl = flask.render_template('username_zhcn.html', usernames=names)
    else:
        tmpl = flask.render_template('username_en.html', usernames=names)
    return tmpl
Example #8
0
    def test_load_short_read(self):
        # When reading from files, the network, etc.
        # there's no guarantee that read(n) returns n bytes
        # so you need to keep calling read() until you get all the data.

        class File(object):
            def __init__(self, data):
                self._data = data

            def read(self, n=None):
                if n is None or n <= 0 or len(self._data) == 0:
                    data, self._data = self._data, b''
                    return data

                n = int(math.ceil(n / 2.0))
                self._data, data = self._data[n:], self._data[:n]
                return data

        p = {'hello': 'world'}
        file = File(umsgpack.dumps(p))
        q = umsgpack.load(file)
        self.assertEqual(p, q)
    def test_load_short_read(self):
        # When reading from files, the network, etc. there's no guarantee that
        # read(n) returns n bytes. Simulate this with a file-like object that
        # returns 1 byte at a time.

        class SlowFile(object):
            def __init__(self, data):
                self._data = data

            def read(self, n=None):
                if n is None or len(self._data) == 0:
                    data, self._data = self._data, b''
                    return data

                chunk = self._data[0:1]
                self._data = self._data[1:]
                return chunk

        obj = {'hello': 'world'}
        f = SlowFile(umsgpack.dumps(obj))
        unpacked = umsgpack.load(f)

        self.assertEqual(unpacked, obj)
    def test_load_short_read(self):
        # When reading from files, the network, etc. there's no guarantee that
        # read(n) returns n bytes. Simulate this with a file-like object that
        # returns 1 byte at a time.

        class SlowFile(object):
            def __init__(self, data):
                self._data = data

            def read(self, n=None):
                if n is None or len(self._data) == 0:
                    data, self._data = self._data, b''
                    return data

                chunk = self._data[0:1]
                self._data = self._data[1:]
                return chunk

        obj = {'hello': 'world'}
        f = SlowFile(umsgpack.dumps(obj))
        unpacked = umsgpack.load(f)

        self.assertEqual(unpacked, obj)
Example #11
0
def load_meta(meta_file):
    with open(meta_file, "rb") as fid:
        data = umsgpack.load(fid, encoding="utf-8")
        meta = data["meta"]
    return meta
def main():
    #global argv
    start_time = time.clock()
    print 'Entering the main thread to start the program'
    #s = ''
    #digits = None
    #app2.run(debug=True)
    data = pd.read_csv('Review_chennai.csv', sep='|')
    print data.head()
    clean_rateofreview = lambda x: str(x).split()[1]
    data['rating'] = data['rateofreview'].apply(clean_rateofreview)
    revs = data.loc[:, ['r_name', 'reviewtext', 'rating']]
    print revs.head()
    print revs.count()
    for i in list(np.where(pd.isnull(revs))):
        revs.drop(revs.index[i], inplace=True)
    print revs.count()
    revs_new = revs[revs['rating'] != '3.0']
    revs_new['sentiment'] = revs_new['rating'] >= '3.5'
    print revs_new.head()
    revs_new['sentiment'] = revs_new['sentiment'].apply(binarize_sentiment)
    print revs_new.head()

    vectorer = TfidfVectorizer(min_df=2,
                               ngram_range=(1, 2),
                               stop_words='english')
    #vectorer = TfidfVectorizer(min_df=2,ngram_range=(1,2))
    bow = vectorer.fit_transform(revs_new['reviewtext'])
    target = revs_new['sentiment'].values

    n_samples, n_features = bow.shape
    print '#######################################'
    print n_samples, n_features

    print len(vectorer.get_feature_names())

    print vectorer.get_feature_names()[:10]
    print vectorer.get_feature_names()[n_features // 2:n_features // 2 + 50]
    #print vectorer.vocabulary_

    features_train, features_test, target_train, target_test = train_test_split(
        bow, target, test_size=0.20, random_state=1)

    print features_train.shape
    print target_train.shape
    print features_test.shape
    print target_test.shape

    logreg = LogisticRegression(C=1)
    logreg.fit(features_train, target_train)

    target_predicted = logreg.predict(features_test)
    print target_predicted

    print 'Testing Accuracy is ', accuracy_score(target_test, target_predicted)

    print 'Training Accuracy is', logreg.score(features_train, target_train)
    print 'Testing Accuracy is', logreg.score(features_test, target_test)

    TESTDATA1 = StringIO("""Review
	1;Sushi is Amazing
	2;Sushi is bad
	3;Sushi is not good
	4;Sushi is beautiful
	5;Sushi is bad terrible and good
	6;Sushi is amazing bad and terrible
	7;Sushi is amazing terrible horrible and bad
	8;Sushi is not awesome
	9;Sushi is not great
	10;Sushi is very bad
	11;Sushi is not brilliant
	12;Sushi is unpleasant
	13;Sushi is pleasant
	""")
    # print '################################'
    # print 'Number of arguments:', len(sys.argv), 'arguments.'
    # print 'Argument List:', str(sys.argv)
    # print 'test review is ', sys.argv[1]
    # test_review = str(sys.argv[1])
    # TESTDATA=StringIO("""Review
    # 	;""" + test_review)

    df1 = DataFrame.from_csv(TESTDATA1, sep=";", parse_dates=False)
    print df1

    test_bow = vectorer.transform(df1['Review'])
    prediction = logreg.predict(test_bow)
    print prediction

    timedump_joblib = time.clock()
    # pickling the models
    from sklearn.externals import joblib
    joblib.dump(vectorer, 'BiGram_Vectorizer.pkl')
    joblib.dump(logreg, 'BiGram_Log_Reg_Model.pkl')
    print 'Time for joblib dumping of models: ', time.clock() - timedump_joblib

    timedump = time.clock()
    f = open('vect.bin', 'wb')
    g = open('model.bin', 'wb')
    #umsgpack.dump({u"compact": True, u"schema": 0}, f)
    umsgpack.dump(vectorer, f)
    umsgpack.dump(logreg, g)
    f.close()
    print 'Time for umsgpack dumping of models: ', time.clock() - timedump

    timeload = time.clock()
    f = open('vect.bin', 'rb')
    g = open('model.bin', 'rb')
    vectorer1 = umsgpack.load(f)
    logreg1 = umsgpack.load(g)
    print 'Time for umsgpack loading of models: ', time.clock() - timeload

    print 'Loaded Vectorer is \n', vectorer1
    print 'Loaded Model is \n', logreg1

    print time.clock() - start_time, "seconds"
Example #13
0
def read_radmap(args, base_path, fff):
    import numpy as np
    import os
    import rasterio
    from math import floor, ceil
    import umsgpack
    from scipy.interpolate.fitpack2 import RectBivariateSpline
    from collections import defaultdict
    import datetime
    import scipy.stats as stats

    rad_basedir = os.path.join(base_path, r'quantile_maps',
                               r'quantile_maps_monthly')
    #     rad_basedir_year = os.path.join(base_path, r'quantile_maps', r'quantile_maps_monthly_allyears')

    h, w, lat, lon, bbox84, base_paths, outdir = args

    days = [
        31.0, 28.0, 31.0, 30.0, 31.0, 30.0, 31.0, 31.0, 30.0, 31.0, 30.0, 31.0
    ]

    alpha_degs = np.arange(-180.0, 181.0, 15.0)
    beta_degs = np.arange(0.0, 95.0, 10.0)

    rad_inp = defaultdict(dict)
    rad_inp_rst = defaultdict(dict)

    #     for radkey in['avg', 'min', '25', '75', 'max']:
    for radkey in ['avg']:
        for radtype in ['sis', 'dif', 'dir']:
            rad_inp_rst[(radkey, radtype)] = np.zeros((12, 38, 91, 361),
                                                      dtype=np.uint16)

    for month in range(1, 13):
        #     for month in range(1, 2):

        fff.write("{} interpolate radiation month {}".format(
            str(datetime.datetime.now()), month))
        fff.write("\n")
        fff.flush()

        inparas = [
            'rad', 'h',
            int(h), 'w',
            int(w), 'lat', lat, 'lon', lon, 'month', month
        ]
        in_filename = "_".join(list(map(str, inparas))) + ".mp"
        in_path = os.path.join(rad_basedir, in_filename)

        with open(in_path, 'rb') as f:
            data_avg = umsgpack.load(f)

        for hour in range(3, 22):
            for mins in [0, 30]:
                hkey = (hour, mins)

                z_sis = np.zeros((len(beta_degs), len(alpha_degs)))
                z_dif = np.zeros((len(beta_degs), len(alpha_degs)))
                z_dir = np.zeros((len(beta_degs), len(alpha_degs)))

                for i, beta_deg in enumerate(beta_degs):
                    for j, alpha_deg in enumerate(alpha_degs):

                        akey = (alpha_deg, beta_deg)

                        z_sis[i, j] = float(
                            data_avg[hkey][akey]['sis']['avg']) * days[month -
                                                                       1] * 0.5
                        z_dif[i, j] = float(
                            data_avg[hkey][akey]['dif']['avg']) * days[month -
                                                                       1] * 0.5
                        z_dir[i, j] = float(
                            data_avg[hkey][akey]['sid']['avg']) * days[month -
                                                                       1] * 0.5

                rad_inp[('avg', 'sis', month)][hkey] = RectBivariateSpline(
                    beta_degs, alpha_degs, z_sis)
                rad_inp[('avg', 'dif', month)][hkey] = RectBivariateSpline(
                    beta_degs, alpha_degs, z_dif)
                rad_inp[('avg', 'dir', month)][hkey] = RectBivariateSpline(
                    beta_degs, alpha_degs, z_dir)
        del hour, mins, hkey, i, j, beta_deg, alpha_deg

        for radkey in ['avg']:

            #         for radkey in['avg', 'min', '25', '75', 'max']:
            for radtype in ['sis', 'dif', 'dir']:
                for hour in range(3, 22):
                    for mins in [0, 30]:
                        hkey = (hour, mins)

                        RBS = rad_inp[(radkey, radtype, month)][hkey]
                        interpolated = RBS(np.arange(0, 91, 1),
                                           np.arange(-180, 181, 1),
                                           grid=True).astype(np.float32)
                        interpolated[interpolated < 0] = 0

                        mmax = np.max(interpolated)
                        if mmax > 65535:
                            fff.write(
                                "{}: Overflow : {} / {} {} {} {} ".format(
                                    str(datetime.datetime.now()), mmax, radkey,
                                    radtype, hour, mins))
                            fff.write("\n")
                            fff.flush()
                        t = int(hour * 2 + mins / 30) - 6
                        rad_inp_rst[(radkey,
                                     radtype)][month - 1,
                                               t, :, :] = interpolated.astype(
                                                   np.uint16)

    mmax = 0.0
    mmin = 100000000
    for k in rad_inp_rst:
        fff.write("{}: Max/min rad for {}: {}/{}".format(
            str(datetime.datetime.now()), k, np.max(rad_inp_rst[k]),
            np.min(rad_inp_rst[k])))
        fff.write("\n")
        mmax = max(mmax, np.max(rad_inp_rst[k]))
        mmin = min(mmin, np.min(rad_inp_rst[k]))

    fff.write("{}: Max/min rad overall: {}/{}".format(
        str(datetime.datetime.now()), mmax, mmin))
    fff.write("\n")
    fff.flush()

    return rad_inp_rst
Example #14
0
def load():
    data = []
    with open("api.p", mode="rb") as p:
        data = pickle.load(p)
    return data
Example #15
0
What is the largest prime factor of the number 600851475143 ?
"""
from itertools import count, takewhile
from math import ceil, sqrt
from pathlib import Path
from typing import Dict, Iterator, Optional

from sortedcontainers import SortedSet
from umsgpack import load

cache_filename = 'p0003_cache.mpack'

try:
    with Path(__file__).parent.joinpath(cache_filename).open('rb') as f:
        cache = SortedSet(load(f))
except Exception:
    cache = SortedSet(
        [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61])
last_cached: int = cache[-1] + 2


def primes(stop: Optional[int] = None) -> Iterator[int]:
    if stop is None:
        yield from cache
    else:
        yield from takewhile(stop.__gt__, cache)
    global last_cached
    if stop and last_cached - 2 > stop:
        return
    if stop is None: