def save(): data = [] with open("API.h", mode="r") as theta: data = parse(theta.read().split("\n")) with open("api.p", mode="wb") as thata: pickle.dump(data, thata) with open("api.json", mode="w") as jsonf: jsonf.write(jsonpickle.encode(data, unpicklable=False))
def write_msgpack_bin_to_disk(filename, json_data): """ Store msgpack data as bin on disk """ with open(filename, 'wb') as f: #umsgpack.pack(json_data, f) umsgpack.dump(json_data, f) f.close()
def on_connect(self): self.exposed_namespace = {} self.exposed_cleanups = [] self._conn._config.update(REVERSE_SLAVE_CONF) infos = Buffer() umsgpack.dump(self.exposed_get_infos(), infos) pupy.namespace = UpdatableModuleNamespace(self.exposed_getmodule) self._conn.root.initialize_v1( self.exposed_namespace, pupy.namespace, sys.modules['__builtin__'], self.exposed_register_cleanup, self.exposed_unregister_cleanup, self.exposed_obtain_call, self.exposed_exit, self.exposed_eval, self.exposed_execute, sys.modules.get('pupyimporter'), infos)
def main(args): print("Loading Kitti-lidar from", args.root_dir) try: print("Opening config file %s" % args.config) cfg = yaml.safe_load(open(args.config, 'r')) except Exception as e: print(e) print("Error opening yaml file.") quit() num_stuff, num_thing = _get_meta(cfg) _mk_dir(args.out_dir) split = cfg['split'][args.split] _list = _get_list(args.root_dir, split, cfg) worker = _Worker(args.root_dir, args.out_dir, args.depth_cutoff, args.point_cloud) with Pool(initializer=_init_counter, initargs=(_Counter(0), )) as pool: total = len(_list) for feedback in tqdm.tqdm(pool.imap(worker, _list, 10), total=total): nothing = feedback # Write meta-data print("Writing meta-data") meta = { "num_stuff": num_stuff, "num_thing": num_thing, "categories": [], "palette": [], "original_ids": [] } for lbl in range(num_thing + num_stuff): if lbl != 255: meta["categories"].append( cfg['labels'][cfg['learning_map_inv'][lbl]]) meta["palette"].append( cfg['color_map'][cfg['learning_map_inv'][lbl]]) meta["original_ids"].append(cfg['learning_map_inv'][lbl]) with open(path.join(args.out_dir, "metadata.bin"), "wb") as fid: umsgpack.dump(meta, fid, encoding="utf-8")
def username_generator(): mkv_lvl = 310 mkv_len = 16 acceptlang = flask.g.get('acceptlang') accepttw = flask.g.get('accepttw') try: num = int(flask.request.args.get('num', 100)) except Exception: num = 100 fjson = flask.request.is_xhr or flask.request.args.get('f') == "json" unamemodel = getattr(flask.g, 'unamemodel', None) if unamemodel is None: unamemodel = flask.g.unamemodel = markov.MarkovModel( os.path.join(OS_DATA, 'stats_user.txt')) cachefn = os.path.join(OS_DATA, 'stats_user_%d-%d.msgp' % (mkv_lvl, mkv_len)) if os.path.isfile(cachefn): with open(cachefn, 'rb') as f: nbparts = umsgpack.load(f) idxrange = unamemodel.init(mkv_lvl, mkv_len, nbparts) else: idxrange = unamemodel.init(mkv_lvl, mkv_len) with open(cachefn, 'wb') as f: umsgpack.dump(unamemodel.nbparts, f) else: idxrange = unamemodel[(0, 0, 0)] names = [ unamemodel.print_pwd(random.randrange(idxrange))[0] for x in range(num) ] if fjson: return flask.jsonify({'usernames': names}) uselang = (max( ('en', 'zh-cn', 'zh-tw', 'zh'), key=lambda x: acceptlang.get(x, 0)) if acceptlang else 'en') if uselang.startswith('zh'): if uselang == 'zh-tw': tmpl = flask.render_template('username_zhtw.html', usernames=names) else: tmpl = flask.render_template('username_zhcn.html', usernames=names) else: tmpl = flask.render_template('username_en.html', usernames=names) return tmpl
def on_connect(self): self.exposed_namespace = {} self.exposed_cleanups = [] self._conn._config.update(REVERSE_SLAVE_CONF) infos_buffer = Buffer() infos = self.exposed_get_infos() try: umsgpack.dump(infos, infos_buffer, ext_handlers=MSG_TYPES_PACK) except Exception as e: pupy.remote_error('on_connect failed: {}; infos={}', e, infos) self._conn.root.initialize_v1(self.exposed_namespace, pupy.namespace, __import__('__builtin__'), self.exposed_register_cleanup, self.exposed_unregister_cleanup, self.exposed_obtain_call, self.exposed_exit, self.exposed_eval, self.exposed_execute, __import__('pupyimporter'), infos_buffer)
def save(self, kind): start = datetime.datetime.now() logging.debug("Start: Writing result to disk, this gonna take a while ...") path, file = os.path.split(self.training_file) with open('trained/'+file[:-4]+'_'+kind+'_'+str(self.ngram_size)+'_'+str(self.length)+'.pack', 'wb') as fp: if kind == "ip_list": umsgpack.dump(self.ip_list, fp) elif kind == "cp_list": umsgpack.dump(self.cp_list, fp) elif kind == "ep_list": umsgpack.dump(self.ep_list, fp) else: raise Exception("Unknown list given (required: ip_list, cp_list, or ep_list)") logging.debug("Done! Everything stored on disk.") logging.debug("Storing the data on disk took: {}".format(datetime.datetime.now()-start))
def exposed_msgpack_dumps(self, obj, compressed=False): data = Buffer(compressed=compressed) umsgpack.dump(obj, data) data.flush() return data
def _worker_run_unsafe(self, buf): global HAS_BUFFER_OPTIMIZATION while not self._terminate.is_set(): task = self.queue.get() if task is None: self._terminate.set() break command, args, callback = task if command is None and args is None: if callback is not None: callback(None, None) self._terminate.set() break restore_compression = False channel = None try: channel = object.__getattribute__(callback, "____conn__")()._channel except: pass try: for chunk in command(*args): msgpack.dump(chunk, buf) if channel and channel.compress and chunk.get( F_TYPE) == T_ZCONTENT: restore_compression = True channel.compress = False del chunk bpos = None if HAS_BUFFER_OPTIMIZATION: bpos = len(buf) else: bpos = buf.tell() if bpos > self.chunk_size: if HAS_BUFFER_OPTIMIZATION: callback(buf, None) buf.drain() else: buf.seek(0) data = buf.read(bpos) buf.seek(0) callback(data, None) del data if restore_compression: try: channel.compress = restore_compression except: pass restore_compression = False if self._terminate.is_set(): break bpos = None if HAS_BUFFER_OPTIMIZATION: bpos = len(buf) else: bpos = buf.tell() if bpos > 0: if HAS_BUFFER_OPTIMIZATION: callback(buf, None) buf.drain() else: buf.seek(0) data = buf.read(bpos) buf.seek(0) callback(data, None) del data except Exception, e: try: callback(None, e) except EOFError: pass finally:
def main(): #global argv start_time = time.clock() print 'Entering the main thread to start the program' #s = '' #digits = None #app2.run(debug=True) data = pd.read_csv('Review_chennai.csv', sep='|') print data.head() clean_rateofreview = lambda x: str(x).split()[1] data['rating'] = data['rateofreview'].apply(clean_rateofreview) revs = data.loc[:, ['r_name', 'reviewtext', 'rating']] print revs.head() print revs.count() for i in list(np.where(pd.isnull(revs))): revs.drop(revs.index[i], inplace=True) print revs.count() revs_new = revs[revs['rating'] != '3.0'] revs_new['sentiment'] = revs_new['rating'] >= '3.5' print revs_new.head() revs_new['sentiment'] = revs_new['sentiment'].apply(binarize_sentiment) print revs_new.head() vectorer = TfidfVectorizer(min_df=2, ngram_range=(1, 2), stop_words='english') #vectorer = TfidfVectorizer(min_df=2,ngram_range=(1,2)) bow = vectorer.fit_transform(revs_new['reviewtext']) target = revs_new['sentiment'].values n_samples, n_features = bow.shape print '#######################################' print n_samples, n_features print len(vectorer.get_feature_names()) print vectorer.get_feature_names()[:10] print vectorer.get_feature_names()[n_features // 2:n_features // 2 + 50] #print vectorer.vocabulary_ features_train, features_test, target_train, target_test = train_test_split( bow, target, test_size=0.20, random_state=1) print features_train.shape print target_train.shape print features_test.shape print target_test.shape logreg = LogisticRegression(C=1) logreg.fit(features_train, target_train) target_predicted = logreg.predict(features_test) print target_predicted print 'Testing Accuracy is ', accuracy_score(target_test, target_predicted) print 'Training Accuracy is', logreg.score(features_train, target_train) print 'Testing Accuracy is', logreg.score(features_test, target_test) TESTDATA1 = StringIO("""Review 1;Sushi is Amazing 2;Sushi is bad 3;Sushi is not good 4;Sushi is beautiful 5;Sushi is bad terrible and good 6;Sushi is amazing bad and terrible 7;Sushi is amazing terrible horrible and bad 8;Sushi is not awesome 9;Sushi is not great 10;Sushi is very bad 11;Sushi is not brilliant 12;Sushi is unpleasant 13;Sushi is pleasant """) # print '################################' # print 'Number of arguments:', len(sys.argv), 'arguments.' # print 'Argument List:', str(sys.argv) # print 'test review is ', sys.argv[1] # test_review = str(sys.argv[1]) # TESTDATA=StringIO("""Review # ;""" + test_review) df1 = DataFrame.from_csv(TESTDATA1, sep=";", parse_dates=False) print df1 test_bow = vectorer.transform(df1['Review']) prediction = logreg.predict(test_bow) print prediction timedump_joblib = time.clock() # pickling the models from sklearn.externals import joblib joblib.dump(vectorer, 'BiGram_Vectorizer.pkl') joblib.dump(logreg, 'BiGram_Log_Reg_Model.pkl') print 'Time for joblib dumping of models: ', time.clock() - timedump_joblib timedump = time.clock() f = open('vect.bin', 'wb') g = open('model.bin', 'wb') #umsgpack.dump({u"compact": True, u"schema": 0}, f) umsgpack.dump(vectorer, f) umsgpack.dump(logreg, g) f.close() print 'Time for umsgpack dumping of models: ', time.clock() - timedump timeload = time.clock() f = open('vect.bin', 'rb') g = open('model.bin', 'rb') vectorer1 = umsgpack.load(f) logreg1 = umsgpack.load(g) print 'Time for umsgpack loading of models: ', time.clock() - timeload print 'Loaded Vectorer is \n', vectorer1 print 'Loaded Model is \n', logreg1 print time.clock() - start_time, "seconds"
def save(self, data): umsgpack.dump(data, self._fileobj)
def main(args): print("Loading Cityscapes from", args.root_dir) num_stuff, num_thing = _get_meta() # Prepare directories img_dir = path.join(args.out_dir, "img") _ensure_dir(img_dir) msk_dir = path.join(args.out_dir, "msk") _ensure_dir(msk_dir) lst_dir = path.join(args.out_dir, "lst") _ensure_dir(lst_dir) coco_dir = path.join(args.out_dir, "coco") _ensure_dir(coco_dir) # COCO-style category list coco_categories = [] for lbl in cs_labels: if lbl.trainId != 255 and lbl.trainId != -1 and lbl.hasInstances: coco_categories.append({"id": lbl.trainId, "name": lbl.name}) # Process splits images = [] for split, (split_img_subdir, split_msk_subdir) in _SPLITS.items(): print("Converting", split, "...") img_base_dir = path.join(args.root_dir, split_img_subdir) msk_base_dir = path.join(args.root_dir, split_msk_subdir) img_list = _get_images(msk_base_dir) # Write the list file with open(path.join(lst_dir, split + ".txt"), "w") as fid: fid.writelines(img_id + "\n" for _, img_id, _ in img_list) # Convert to COCO detection format coco_out = { "info": { "version": "1.0" }, "images": [], "categories": coco_categories, "annotations": [] } # Process images in parallel worker = _Worker(img_base_dir, msk_base_dir, img_dir, msk_dir) with Pool(initializer=_init_counter, initargs=(_Counter(0), )) as pool: total = len(img_list) for img_meta, coco_img, coco_ann in tqdm.tqdm(pool.imap( worker, img_list, 8), total=total): images.append(img_meta) # COCO annotation coco_out["images"].append(coco_img) coco_out["annotations"] += coco_ann # Write COCO detection format annotation with open(path.join(coco_dir, split + ".json"), "w") as fid: json.dump(coco_out, fid) # Write meta-data print("Writing meta-data") meta = { "images": images, "meta": { "num_stuff": num_stuff, "num_thing": num_thing, "categories": [], "palette": [], "original_ids": [] } } for lbl in cs_labels: if lbl.trainId != 255 and lbl.trainId != -1: meta["meta"]["categories"].append(lbl.name) meta["meta"]["palette"].append(lbl.color) meta["meta"]["original_ids"].append(lbl.id) with open(path.join(args.out_dir, "metadata.bin"), "wb") as fid: umsgpack.dump(meta, fid, encoding="utf-8")
def main(args): print("Loading Vistas from", args.root_dir) # Process meta-data categories, version = _load_metadata(args.root_dir) cat_id_mvd_to_iss, cat_id_iss_to_mvd, num_stuff, num_thing = _cat_id_maps(categories) # Prepare directories lst_dir = path.join(args.out_dir, "lst") _ensure_dir(lst_dir) coco_dir = path.join(args.out_dir, "coco") _ensure_dir(coco_dir) # Run conversion images = [] for split in _SPLITS: print("Converting", split, "...") # Find all image ids in the split img_ids = [] for name in glob.glob(path.join(args.root_dir, split, _IMAGES_DIR, "*." + _IMAGES_EXT)): _, name = path.split(name) img_ids.append(name[:-(1 + len(_IMAGES_EXT))]) # Write the list file with open(path.join(lst_dir, split + ".txt"), "w") as fid: fid.writelines(img_id + "\n" for img_id in img_ids) # Convert to COCO detection format coco_out = { "info": {"version": str(version)}, "images": [], "categories": [], "annotations": [] } for cat_id, cat_meta in enumerate(categories): if cat_meta["instances"]: coco_out["categories"].append({ "id": cat_id_mvd_to_iss[cat_id], "name": cat_meta["name"] }) # Process images in parallel worker = _Worker(categories, cat_id_mvd_to_iss, path.join(args.root_dir, split), args.out_dir) with Pool(initializer=_init_counter, initargs=(_Counter(0),)) as pool: total = len(img_ids) for img_meta, coco_img, coco_ann in tqdm.tqdm(pool.imap(worker, img_ids, 8), total=total): images.append(img_meta) # COCO annotation coco_out["images"].append(coco_img) coco_out["annotations"] += coco_ann # Write COCO detection format annotation with open(path.join(coco_dir, split + ".json"), "w") as fid: json.dump(coco_out, fid) # Write meta-data print("Writing meta-data") meta = { "images": images, "meta": { "num_stuff": num_stuff, "num_thing": num_thing } } meta["meta"]["categories"] = ["" for _ in range(num_stuff + num_thing)] meta["meta"]["palette"] = [[0, 0, 0] for _ in range(num_stuff + num_thing)] meta["meta"]["original_ids"] = [0 for _ in range(num_stuff + num_thing)] for cat_id, cat_meta in enumerate(categories): if not cat_meta["evaluate"]: continue mapped_id = cat_id_mvd_to_iss[cat_id] meta["meta"]["categories"][mapped_id] = cat_meta["name"] meta["meta"]["palette"][mapped_id] = cat_meta["color"] meta["meta"]["original_ids"][mapped_id] = cat_id with open(path.join(args.out_dir, "metadata.bin"), "wb") as fid: umsgpack.dump(meta, fid, encoding="utf-8")