コード例 #1
0
def save():
    data = []
    with open("API.h", mode="r") as theta:
        data = parse(theta.read().split("\n"))
    with open("api.p", mode="wb") as thata:
        pickle.dump(data, thata)
    with open("api.json", mode="w") as jsonf:
        jsonf.write(jsonpickle.encode(data, unpicklable=False))
コード例 #2
0
def write_msgpack_bin_to_disk(filename, json_data):
    """
	Store msgpack data as bin on disk
	"""
    with open(filename, 'wb') as f:
        #umsgpack.pack(json_data, f)
        umsgpack.dump(json_data, f)
        f.close()
コード例 #3
0
    def on_connect(self):
        self.exposed_namespace = {}
        self.exposed_cleanups = []
        self._conn._config.update(REVERSE_SLAVE_CONF)

        infos = Buffer()
        umsgpack.dump(self.exposed_get_infos(), infos)

        pupy.namespace = UpdatableModuleNamespace(self.exposed_getmodule)
        self._conn.root.initialize_v1(
            self.exposed_namespace, pupy.namespace, sys.modules['__builtin__'],
            self.exposed_register_cleanup, self.exposed_unregister_cleanup,
            self.exposed_obtain_call, self.exposed_exit, self.exposed_eval,
            self.exposed_execute, sys.modules.get('pupyimporter'), infos)
コード例 #4
0
def main(args):
    print("Loading Kitti-lidar from", args.root_dir)

    try:
        print("Opening config file %s" % args.config)
        cfg = yaml.safe_load(open(args.config, 'r'))
    except Exception as e:
        print(e)
        print("Error opening yaml file.")
        quit()

    num_stuff, num_thing = _get_meta(cfg)
    _mk_dir(args.out_dir)
    split = cfg['split'][args.split]
    _list = _get_list(args.root_dir, split, cfg)

    worker = _Worker(args.root_dir, args.out_dir, args.depth_cutoff,
                     args.point_cloud)

    with Pool(initializer=_init_counter, initargs=(_Counter(0), )) as pool:
        total = len(_list)
        for feedback in tqdm.tqdm(pool.imap(worker, _list, 10), total=total):
            nothing = feedback

    # Write meta-data
    print("Writing meta-data")
    meta = {
        "num_stuff": num_stuff,
        "num_thing": num_thing,
        "categories": [],
        "palette": [],
        "original_ids": []
    }

    for lbl in range(num_thing + num_stuff):
        if lbl != 255:
            meta["categories"].append(
                cfg['labels'][cfg['learning_map_inv'][lbl]])
            meta["palette"].append(
                cfg['color_map'][cfg['learning_map_inv'][lbl]])
            meta["original_ids"].append(cfg['learning_map_inv'][lbl])

    with open(path.join(args.out_dir, "metadata.bin"), "wb") as fid:
        umsgpack.dump(meta, fid, encoding="utf-8")
コード例 #5
0
def username_generator():
    mkv_lvl = 310
    mkv_len = 16
    acceptlang = flask.g.get('acceptlang')
    accepttw = flask.g.get('accepttw')
    try:
        num = int(flask.request.args.get('num', 100))
    except Exception:
        num = 100
    fjson = flask.request.is_xhr or flask.request.args.get('f') == "json"
    unamemodel = getattr(flask.g, 'unamemodel', None)
    if unamemodel is None:
        unamemodel = flask.g.unamemodel = markov.MarkovModel(
            os.path.join(OS_DATA, 'stats_user.txt'))
        cachefn = os.path.join(OS_DATA,
                               'stats_user_%d-%d.msgp' % (mkv_lvl, mkv_len))
        if os.path.isfile(cachefn):
            with open(cachefn, 'rb') as f:
                nbparts = umsgpack.load(f)
            idxrange = unamemodel.init(mkv_lvl, mkv_len, nbparts)
        else:
            idxrange = unamemodel.init(mkv_lvl, mkv_len)
            with open(cachefn, 'wb') as f:
                umsgpack.dump(unamemodel.nbparts, f)
    else:
        idxrange = unamemodel[(0, 0, 0)]
    names = [
        unamemodel.print_pwd(random.randrange(idxrange))[0] for x in range(num)
    ]
    if fjson:
        return flask.jsonify({'usernames': names})
    uselang = (max(
        ('en', 'zh-cn', 'zh-tw',
         'zh'), key=lambda x: acceptlang.get(x, 0)) if acceptlang else 'en')
    if uselang.startswith('zh'):
        if uselang == 'zh-tw':
            tmpl = flask.render_template('username_zhtw.html', usernames=names)
        else:
            tmpl = flask.render_template('username_zhcn.html', usernames=names)
    else:
        tmpl = flask.render_template('username_en.html', usernames=names)
    return tmpl
コード例 #6
0
ファイル: service.py プロジェクト: yalpdevx/pupy
    def on_connect(self):
        self.exposed_namespace = {}
        self.exposed_cleanups = []
        self._conn._config.update(REVERSE_SLAVE_CONF)

        infos_buffer = Buffer()
        infos = self.exposed_get_infos()

        try:
            umsgpack.dump(infos, infos_buffer, ext_handlers=MSG_TYPES_PACK)
        except Exception as e:
            pupy.remote_error('on_connect failed: {}; infos={}', e, infos)

        self._conn.root.initialize_v1(self.exposed_namespace, pupy.namespace,
                                      __import__('__builtin__'),
                                      self.exposed_register_cleanup,
                                      self.exposed_unregister_cleanup,
                                      self.exposed_obtain_call,
                                      self.exposed_exit, self.exposed_eval,
                                      self.exposed_execute,
                                      __import__('pupyimporter'), infos_buffer)
コード例 #7
0
 def save(self, kind):
     start = datetime.datetime.now()
     logging.debug("Start: Writing result to disk, this gonna take a while ...")
     path, file = os.path.split(self.training_file)
     with open('trained/'+file[:-4]+'_'+kind+'_'+str(self.ngram_size)+'_'+str(self.length)+'.pack', 'wb') as fp:
         if kind == "ip_list":
             umsgpack.dump(self.ip_list, fp)
         elif kind == "cp_list":
             umsgpack.dump(self.cp_list, fp)
         elif kind == "ep_list":
             umsgpack.dump(self.ep_list, fp)
         else:
             raise Exception("Unknown list given (required: ip_list, cp_list, or ep_list)")
     logging.debug("Done! Everything stored on disk.")
     logging.debug("Storing the data on disk took: {}".format(datetime.datetime.now()-start))
コード例 #8
0
 def exposed_msgpack_dumps(self, obj, compressed=False):
     data = Buffer(compressed=compressed)
     umsgpack.dump(obj, data)
     data.flush()
     return data
コード例 #9
0
    def _worker_run_unsafe(self, buf):
        global HAS_BUFFER_OPTIMIZATION

        while not self._terminate.is_set():
            task = self.queue.get()

            if task is None:
                self._terminate.set()
                break

            command, args, callback = task
            if command is None and args is None:
                if callback is not None:
                    callback(None, None)

                self._terminate.set()
                break

            restore_compression = False
            channel = None

            try:
                channel = object.__getattribute__(callback,
                                                  "____conn__")()._channel
            except:
                pass

            try:
                for chunk in command(*args):
                    msgpack.dump(chunk, buf)

                    if channel and channel.compress and chunk.get(
                            F_TYPE) == T_ZCONTENT:
                        restore_compression = True
                        channel.compress = False

                    del chunk

                    bpos = None

                    if HAS_BUFFER_OPTIMIZATION:
                        bpos = len(buf)
                    else:
                        bpos = buf.tell()

                    if bpos > self.chunk_size:
                        if HAS_BUFFER_OPTIMIZATION:
                            callback(buf, None)
                            buf.drain()
                        else:
                            buf.seek(0)
                            data = buf.read(bpos)
                            buf.seek(0)
                            callback(data, None)
                            del data

                        if restore_compression:
                            try:
                                channel.compress = restore_compression
                            except:
                                pass

                            restore_compression = False

                    if self._terminate.is_set():
                        break

                bpos = None
                if HAS_BUFFER_OPTIMIZATION:
                    bpos = len(buf)
                else:
                    bpos = buf.tell()

                if bpos > 0:
                    if HAS_BUFFER_OPTIMIZATION:
                        callback(buf, None)
                        buf.drain()
                    else:
                        buf.seek(0)
                        data = buf.read(bpos)
                        buf.seek(0)
                        callback(data, None)
                        del data

            except Exception, e:
                try:
                    callback(None, e)
                except EOFError:
                    pass

            finally:
コード例 #10
0
def main():
    #global argv
    start_time = time.clock()
    print 'Entering the main thread to start the program'
    #s = ''
    #digits = None
    #app2.run(debug=True)
    data = pd.read_csv('Review_chennai.csv', sep='|')
    print data.head()
    clean_rateofreview = lambda x: str(x).split()[1]
    data['rating'] = data['rateofreview'].apply(clean_rateofreview)
    revs = data.loc[:, ['r_name', 'reviewtext', 'rating']]
    print revs.head()
    print revs.count()
    for i in list(np.where(pd.isnull(revs))):
        revs.drop(revs.index[i], inplace=True)
    print revs.count()
    revs_new = revs[revs['rating'] != '3.0']
    revs_new['sentiment'] = revs_new['rating'] >= '3.5'
    print revs_new.head()
    revs_new['sentiment'] = revs_new['sentiment'].apply(binarize_sentiment)
    print revs_new.head()

    vectorer = TfidfVectorizer(min_df=2,
                               ngram_range=(1, 2),
                               stop_words='english')
    #vectorer = TfidfVectorizer(min_df=2,ngram_range=(1,2))
    bow = vectorer.fit_transform(revs_new['reviewtext'])
    target = revs_new['sentiment'].values

    n_samples, n_features = bow.shape
    print '#######################################'
    print n_samples, n_features

    print len(vectorer.get_feature_names())

    print vectorer.get_feature_names()[:10]
    print vectorer.get_feature_names()[n_features // 2:n_features // 2 + 50]
    #print vectorer.vocabulary_

    features_train, features_test, target_train, target_test = train_test_split(
        bow, target, test_size=0.20, random_state=1)

    print features_train.shape
    print target_train.shape
    print features_test.shape
    print target_test.shape

    logreg = LogisticRegression(C=1)
    logreg.fit(features_train, target_train)

    target_predicted = logreg.predict(features_test)
    print target_predicted

    print 'Testing Accuracy is ', accuracy_score(target_test, target_predicted)

    print 'Training Accuracy is', logreg.score(features_train, target_train)
    print 'Testing Accuracy is', logreg.score(features_test, target_test)

    TESTDATA1 = StringIO("""Review
	1;Sushi is Amazing
	2;Sushi is bad
	3;Sushi is not good
	4;Sushi is beautiful
	5;Sushi is bad terrible and good
	6;Sushi is amazing bad and terrible
	7;Sushi is amazing terrible horrible and bad
	8;Sushi is not awesome
	9;Sushi is not great
	10;Sushi is very bad
	11;Sushi is not brilliant
	12;Sushi is unpleasant
	13;Sushi is pleasant
	""")
    # print '################################'
    # print 'Number of arguments:', len(sys.argv), 'arguments.'
    # print 'Argument List:', str(sys.argv)
    # print 'test review is ', sys.argv[1]
    # test_review = str(sys.argv[1])
    # TESTDATA=StringIO("""Review
    # 	;""" + test_review)

    df1 = DataFrame.from_csv(TESTDATA1, sep=";", parse_dates=False)
    print df1

    test_bow = vectorer.transform(df1['Review'])
    prediction = logreg.predict(test_bow)
    print prediction

    timedump_joblib = time.clock()
    # pickling the models
    from sklearn.externals import joblib
    joblib.dump(vectorer, 'BiGram_Vectorizer.pkl')
    joblib.dump(logreg, 'BiGram_Log_Reg_Model.pkl')
    print 'Time for joblib dumping of models: ', time.clock() - timedump_joblib

    timedump = time.clock()
    f = open('vect.bin', 'wb')
    g = open('model.bin', 'wb')
    #umsgpack.dump({u"compact": True, u"schema": 0}, f)
    umsgpack.dump(vectorer, f)
    umsgpack.dump(logreg, g)
    f.close()
    print 'Time for umsgpack dumping of models: ', time.clock() - timedump

    timeload = time.clock()
    f = open('vect.bin', 'rb')
    g = open('model.bin', 'rb')
    vectorer1 = umsgpack.load(f)
    logreg1 = umsgpack.load(g)
    print 'Time for umsgpack loading of models: ', time.clock() - timeload

    print 'Loaded Vectorer is \n', vectorer1
    print 'Loaded Model is \n', logreg1

    print time.clock() - start_time, "seconds"
コード例 #11
0
 def save(self, data):
     umsgpack.dump(data, self._fileobj)
コード例 #12
0
def main(args):
    print("Loading Cityscapes from", args.root_dir)
    num_stuff, num_thing = _get_meta()

    # Prepare directories
    img_dir = path.join(args.out_dir, "img")
    _ensure_dir(img_dir)
    msk_dir = path.join(args.out_dir, "msk")
    _ensure_dir(msk_dir)
    lst_dir = path.join(args.out_dir, "lst")
    _ensure_dir(lst_dir)
    coco_dir = path.join(args.out_dir, "coco")
    _ensure_dir(coco_dir)

    # COCO-style category list
    coco_categories = []
    for lbl in cs_labels:
        if lbl.trainId != 255 and lbl.trainId != -1 and lbl.hasInstances:
            coco_categories.append({"id": lbl.trainId, "name": lbl.name})

    # Process splits
    images = []
    for split, (split_img_subdir, split_msk_subdir) in _SPLITS.items():
        print("Converting", split, "...")

        img_base_dir = path.join(args.root_dir, split_img_subdir)
        msk_base_dir = path.join(args.root_dir, split_msk_subdir)
        img_list = _get_images(msk_base_dir)

        # Write the list file
        with open(path.join(lst_dir, split + ".txt"), "w") as fid:
            fid.writelines(img_id + "\n" for _, img_id, _ in img_list)

        # Convert to COCO detection format
        coco_out = {
            "info": {
                "version": "1.0"
            },
            "images": [],
            "categories": coco_categories,
            "annotations": []
        }

        # Process images in parallel
        worker = _Worker(img_base_dir, msk_base_dir, img_dir, msk_dir)
        with Pool(initializer=_init_counter, initargs=(_Counter(0), )) as pool:
            total = len(img_list)
            for img_meta, coco_img, coco_ann in tqdm.tqdm(pool.imap(
                    worker, img_list, 8),
                                                          total=total):
                images.append(img_meta)

                # COCO annotation
                coco_out["images"].append(coco_img)
                coco_out["annotations"] += coco_ann

        # Write COCO detection format annotation
        with open(path.join(coco_dir, split + ".json"), "w") as fid:
            json.dump(coco_out, fid)

    # Write meta-data
    print("Writing meta-data")
    meta = {
        "images": images,
        "meta": {
            "num_stuff": num_stuff,
            "num_thing": num_thing,
            "categories": [],
            "palette": [],
            "original_ids": []
        }
    }

    for lbl in cs_labels:
        if lbl.trainId != 255 and lbl.trainId != -1:
            meta["meta"]["categories"].append(lbl.name)
            meta["meta"]["palette"].append(lbl.color)
            meta["meta"]["original_ids"].append(lbl.id)

    with open(path.join(args.out_dir, "metadata.bin"), "wb") as fid:
        umsgpack.dump(meta, fid, encoding="utf-8")
コード例 #13
0
def main(args):
    print("Loading Vistas from", args.root_dir)

    # Process meta-data
    categories, version = _load_metadata(args.root_dir)
    cat_id_mvd_to_iss, cat_id_iss_to_mvd, num_stuff, num_thing = _cat_id_maps(categories)

    # Prepare directories
    lst_dir = path.join(args.out_dir, "lst")
    _ensure_dir(lst_dir)
    coco_dir = path.join(args.out_dir, "coco")
    _ensure_dir(coco_dir)

    # Run conversion
    images = []
    for split in _SPLITS:
        print("Converting", split, "...")

        # Find all image ids in the split
        img_ids = []
        for name in glob.glob(path.join(args.root_dir, split, _IMAGES_DIR, "*." + _IMAGES_EXT)):
            _, name = path.split(name)
            img_ids.append(name[:-(1 + len(_IMAGES_EXT))])

        # Write the list file
        with open(path.join(lst_dir, split + ".txt"), "w") as fid:
            fid.writelines(img_id + "\n" for img_id in img_ids)

        # Convert to COCO detection format
        coco_out = {
            "info": {"version": str(version)},
            "images": [],
            "categories": [],
            "annotations": []
        }
        for cat_id, cat_meta in enumerate(categories):
            if cat_meta["instances"]:
                coco_out["categories"].append({
                    "id": cat_id_mvd_to_iss[cat_id],
                    "name": cat_meta["name"]
                })

        # Process images in parallel
        worker = _Worker(categories, cat_id_mvd_to_iss, path.join(args.root_dir, split), args.out_dir)
        with Pool(initializer=_init_counter, initargs=(_Counter(0),)) as pool:
            total = len(img_ids)
            for img_meta, coco_img, coco_ann in tqdm.tqdm(pool.imap(worker, img_ids, 8), total=total):
                images.append(img_meta)

                # COCO annotation
                coco_out["images"].append(coco_img)
                coco_out["annotations"] += coco_ann

        # Write COCO detection format annotation
        with open(path.join(coco_dir, split + ".json"), "w") as fid:
            json.dump(coco_out, fid)

    # Write meta-data
    print("Writing meta-data")
    meta = {
        "images": images,
        "meta": {
            "num_stuff": num_stuff,
            "num_thing": num_thing
        }
    }

    meta["meta"]["categories"] = ["" for _ in range(num_stuff + num_thing)]
    meta["meta"]["palette"] = [[0, 0, 0] for _ in range(num_stuff + num_thing)]
    meta["meta"]["original_ids"] = [0 for _ in range(num_stuff + num_thing)]
    for cat_id, cat_meta in enumerate(categories):
        if not cat_meta["evaluate"]:
            continue

        mapped_id = cat_id_mvd_to_iss[cat_id]
        meta["meta"]["categories"][mapped_id] = cat_meta["name"]
        meta["meta"]["palette"][mapped_id] = cat_meta["color"]
        meta["meta"]["original_ids"][mapped_id] = cat_id

    with open(path.join(args.out_dir, "metadata.bin"), "wb") as fid:
        umsgpack.dump(meta, fid, encoding="utf-8")