def convert_msg_to_txt_file(self): pat=re.compile('t(\d{2})-(\d{2})\.dat') for _file_num in range(len(self.msg_t_file_list)): with open(self.msg_t_file_list[_file_num].get_path(), 'rb') as f: self.ttt.extend(msgpack.load(f)) with open(self.msg_x_file_list[_file_num].get_path(), 'rb') as f: self.x.extend(msgpack.load(f)) with open(self.msg_y_file_list[_file_num].get_path(), 'rb') as f: self.y.extend(msgpack.load(f)) with open(self.msg_I_file_list[_file_num].get_path(), 'rb') as f: self.I.extend(msgpack.load(f)) print(123,self.msg_t_file_list[_file_num].get_path()) print(234,len(self.ttt),len(self.x),len(self.y),len(self.I)) _cur_res = re.search(pat, self.msg_t_file_list[_file_num].name) beads_num=_cur_res.group(1) file_num=_cur_res.group(2) _cur_dir_path=self.msg_t_file_list[_file_num].parent.parent.get_path()+'/txt/'+self.msg_t_file_list[_file_num].parent.name if not os.path.exists(_cur_dir_path): os.makedirs(_cur_dir_path) _cur_file_path=_cur_dir_path+'/beads{}_{}.txt'.format(beads_num, file_num) print(555,len(self.ttt)) with open(_cur_file_path, 'w+') as f: f.write('time(sec),x(nm),y(nm),total intensity(V)\r\n') for i in range(len(self.ttt)): f.write('{},{},{},{}\r\n'.format(self.ttt[i], self.x[i], self.y[i], self.I[i])) if i%10**6==0:print(i) self.ttt, self.x, self.y, self.I = [],[],[],[]
def read_msg_file_to_tx2(self, _file_num=None): if _file_num == None: print('please assign file_number') with open(self.msg_t_file_list[_file_num].get_path(), 'rb') as f: self.ttt.extend(msgpack.load(f)) with open(self.msg_x_file_list[_file_num].get_path(), 'rb') as f: self.x.extend(msgpack.load(f)) print(222,self.msg_t_file_list[_file_num].get_path()) print(len(self.ttt))
def get_or_build(path, build_fn, *args, **kwargs): """ Load from serialized form or build an object, saving the built object. Remaining arguments are provided to `build_fn`. """ save = False obj = None if path is not None and os.path.isfile(path): with open(path, 'rb') as obj_f: obj = msgpack.load(obj_f, use_list=False, encoding='utf-8') else: save = True if obj is None: obj = build_fn(*args, **kwargs) if save and path is not None: with open(path, 'wb') as obj_f: msgpack.dump(obj, obj_f) return obj
def main(): r = redis.StrictRedis('localhost', 6379) dat = msgpack.load(open(args.file_name)) r.delete(args.set_name) for text, score in dat: enc = msgpack.dumps( [ text, score ] ) r.zadd(args.set_name, score, enc)
def load_file(self, fp): try: return msgpack.load(fp) except Exception as ex: log.warn('Unable to load object from file: %s', ex, exc_info=True) return None
def get_msgpack_object_ref(path): """Get object-id ref for object in messagepack-encoded file. Args: (str) path: Full path to file. Returns: (str) reference, in form A/B/C e.g. '93/111124/2' Raises: IOError if the file cannot be opened. ValueError if the data in the file cannot be decoded. KeyError if the reference field is not found in the data. """ import msgpack try: f = open(path) except IOError as err: raise IOError('Cannot open file for reading: {}'.format(path)) try: t = msgpack.load(f) except Exception as err: raise ValueError('Cannot decode messagepack data in path "{}": {}' ''.format(path, err)) try: ref = t['ref'] except KeyError: raise KeyError('Field "ref" not found in object at "{}"'.format(path)) return ref
def split_all_url(): ''' 将url列表以人分成多个文件,然后用hadoop爬数据 ''' result_folder = 'person_url_check' if not os.path.exists(result_folder): os.makedirs(result_folder) pic_face_index_dic = msgpack.load(open('pic_face_index_url_dic.p', 'rb')) person_count = 0 url_count = 0 person_index = 0 for person in pic_face_index_dic: start = time() person_index += 1 with open(os.path.join(result_folder, str(person_index)), 'w') as f_result: try: need_check_url_index_list = pic_face_index_dic.get(person) for index, pic_url in need_check_url_index_list: write_content = [person, index, pic_url] f_result.write('\t'.join(map(str, write_content))+'\n') url_count += 1 except: traceback.print_exc() continue person_count += 1 print person, person_count, url_count, time()-start
def move_pic(): pic_folder = '/data/pictures_face/' right_pic_folder = '/data/pictures_face_baidu_filter/' need_annotate_folder = '/data/pictures_face_need_annotate/' person_result_dic = msgpack.load(open('person_result_dic.p', 'r')) person_list = os.listdir(pic_folder) for person in person_list: old_person_path = os.path.join(pic_folder, person) right_person_path = os.path.join(right_pic_folder, person) annotate_person_path = os.path.join(need_annotate_folder, person) right_index_list, wrong_index_list = person_result_dic.get(person.decode('gbk').encode('utf-8'), ([], [])) right_index_list = set(right_index_list) wrong_index_list = set(wrong_index_list) old_pic_list = os.listdir(old_person_path) for pic in old_pic_list: pic_index = pic.replace('.png', '').replace('0.jpg', '').replace('_', '') if pic_index in right_index_list: if not os.path.exists(right_person_path): os.makedirs(right_person_path) shutil.copyfile(os.path.join(old_person_path, pic), os.path.join(right_person_path, pic)) elif pic_index in wrong_index_list: continue else: if not os.path.exists(annotate_person_path): os.makedirs(annotate_person_path) shutil.copyfile(os.path.join(old_person_path, pic), os.path.join(annotate_person_path, pic))
def load_check_result_url(dic_file, check_url_file): person_result_dic = {} # {person:([](right_set),[](wrong_set))} # 肯定正确和肯定错的的图片 right_url_count = wrong_url_count = error_format_count = no_baike_count = no_meaning_count = 0 if os.path.exists(dic_file): person_result_dic = msgpack.load(open(dic_file, 'rb')) for line in open(check_url_file): tmp = line.rstrip().split('\t') # [person_name, pic_index, pic_url, baike_name, baike_sim, newbaike_sim, guess_info] person_name = tmp[0] right_list, wrong_list = person_result_dic.get(person_name, ([], [])) if len(tmp) == 7: if tmp[3] not in no_meaning_list: if tmp[3] == no_find_baike: no_baike_count += 1 continue else: if get_newbaike_sim(tmp[4]) > sim_threshold: if tmp[0] == tmp[3]: right_list.append(tmp[1]) right_url_count += 1 else: wrong_url_count += 1 wrong_list.append(tmp[1]) else: # 小于某概率时结果不可信,需要标注 no_baike_count += 1 continue else: no_meaning_count += 1 continue else: error_format_count += 1 continue person_result_dic[person_name] = (right_list, wrong_list) print right_url_count, wrong_url_count, no_baike_count, no_meaning_count, error_format_count msgpack.dump(person_result_dic, open('person_result_dic.p', 'w'))
def parseFromFile(self, fname): """ Overwritten to read Msgpack files. """ import msgpack f = open(fname, "r") return msgpack.load(f)
def targets(tgt, tgt_type='glob', **kwargs): # pylint: disable=W0613 ''' Return the targets from the flat yaml file, checks opts for location but defaults to /etc/salt/roster ''' cache = os.path.join(syspaths.CACHE_DIR, 'master', 'minions', tgt, 'data.p') if not os.path.exists(cache): return {} roster_order = __opts__.get('roster_order', ( 'public', 'private', 'local' )) with salt.utils.fopen(cache, 'r') as fh_: cache_data = msgpack.load(fh_) ipv4 = cache_data.get('grains', {}).get('ipv4', []) preferred_ip = extract_ipv4(roster_order, ipv4) if preferred_ip is None: return {} return { tgt: { 'host': preferred_ip, } }
def __init__(self, ensemble_clf_model): with open(ensemble_clf_model, "rb") as file_handler_in: self.classifiers, self.manual_feature_list = msgpack.load(file_handler_in) # interp GBDT x 3: x, v, a | manual GBDT | Output: LR self.interp_gbdts = [CustomGradientBoostingClassifier(arg) for arg in self.classifiers[:3]] self.manual_gbdt = CustomGradientBoostingClassifier(self.classifiers[3]) self.lr = CustomLogisticRegression(self.classifiers[4])
def parse(self, stream, media_type=None, parser_context=None): try: return msgpack.load(stream, use_list=True, object_hook=MessagePackDecoder().decode) except Exception, exc: raise ParseError('MessagePack parse error - %s' % unicode(exc))
def load_msgpack(blob, **kwargs): """ Load a dict packed with msgpack into kwargs for a Trimesh constructor Parameters ---------- blob : bytes msgpack packed dict containing keys 'vertices' and 'faces' Returns ---------- loaded : dict Keyword args for Trimesh constructor, aka mesh=trimesh.Trimesh(**loaded) """ import msgpack if hasattr(blob, 'read'): data = msgpack.load(blob) else: data = msgpack.loads(blob) loaded = load_dict(data) return loaded
def main(path): sensor_time_values = {"Pupil Eyetracker": {}, "LTR-506ALS Light sensor": {}} for fn in glob.glob(path + "/*.msgpack"): s = msgpack.load(open(fn)) for k, vs in s[3].items(): for v in vs: if k == "Pupil Eyetracker": sensor_time_values[k][v[1]] = v[0][2] elif k == "LTR-506ALS Light sensor": sensor_time_values[k][v[1]] = v[0][0] mp.ion() mp.show() for k, vs in sensor_time_values.items(): if k == "Pupil Eyetracker": c = [0, 1, 0] elif k == "LTR-506ALS Light sensor": c = [1, 0, 0] va = np.array(vs.values()) M = np.max(va) m = np.min(va) s = 1.0 / (M - m) prev_x = 0 xs = [] ys = [] for x, y in sorted(vs.items()): if x - prev_x < 0.25: continue xs.append(x) ys.append(y) prev_x = x mp.plot(np.array(xs) - xs[0], (np.array(ys) - m) * s, c=c, label=k) mp.title("Pupil Radius and Ambient Light over Time") mp.legend() mp.draw() mp.savefig("pupil_light_plot.png")
def _load_from_file(self, id): fname = self._get_fname_for_id(id) # # Due to some concurrency issues, we need to perform this check # before we try to read the .trace file. # if not os.path.exists(fname): for _ in xrange(1 / 0.05): time.sleep(0.05) if os.path.exists(fname): break else: msg = 'Timeout expecting trace file to be written "%s"' % fname raise IOError(msg) # # Ok... the file exists, but it might still be being written # req_res = open(fname, 'rb') request_dict, response_dict = msgpack.load(req_res) req_res.close() request = HTTPRequest.from_dict(request_dict) response = HTTPResponse.from_dict(response_dict) return (request, response)
def redis_store(input_dir, name, server, port, **kw): import redis r = redis.StrictRedis(server, port) times = set() sensor_types = {} fn_to_time = lambda x: int(x.rsplit('/', 1)[-1].split('.', 1)[0]) r.sadd('users', name) for fn in sorted(glob.glob(input_dir + '/*'), key=fn_to_time): fn_time = fn_to_time(fn) / 1000. if fn.endswith('.jpg'): times.add(sample[1]) r.zadd(name + ':images', fn_time, os.path.basename(fn)) else: try: data = msgpack.load(open(fn)) except ValueError: print('Could not parse [%s]' % fn) continue print(data) for sensor_name, type_num in data[2].items(): sensor_types[sensor_name] = msgpack.dumps(type_num) for sensor_name, samples in data[3].items(): for sample in samples: times.add(sample[1]) r.zadd(name + ':sensor:' + sensor_name, sample[1], msgpack.dumps(sample)) r.hmset(name + ':sensors', sensor_types) r.zadd(name + ':times', **{msgpack.dumps(x): x for x in times})
def LoadPermResults(path,name,method,idx): if method=='msgpack': with open(os.path.join(path,name+'.mpac')) as f: data = msgpack.load(f)[idx] else: with open(os.path.join(path,name+'.pickle')) as f: data = pickle.load(f)[idx] return data
def from_cache(cls, path): """Initialize a UCD from a cache file generated by gen_cache(), faster and uses less memory.""" u = cls(None) u.code_points = {} with open(path) as f: for id, attrs in msgpack.load(f): u.code_points[id] = CodePoint(u, id, attrs) return u
def __call__(self): if self.filename == None: self.filename = msgpackmemoized_basedir + '/' + self.f.__name__ + '.msgpack' if path.exists(self.filename): return msgpack.load(open(self.filename)) result = self.f() msgpack.dump(result, open(self.filename, 'w')) return result
def init(): global topicid2word, username2userid, userid2username, userid2etc f = open(DATAS_DIRPATH+'data_msgpack') aaa = msgpack.load(f) f.close() topicid2word = aaa['topicid2word'] username2userid = aaa['username2userid'] userid2username = aaa['userid2username'] userid2etc = aaa['userid2etc']
def model_fromfile(path): if path.endswith('.js.gz'): return call_import(json.loads(zlib.decompress(open(path).read()))) elif path.endswith('.msgpack.gz'): return call_import(msgpack.load(zlib.decompress(open(path).read()))) elif path.endswith('.pkl.gz'): return pickle.loads(zlib.decompress(open(path).read())) else: raise ValueError('Unknown model type[%s]' % path)
def _read_metadata(repo, repo_info): cache_path = '{0}/{1}.p'.format( self.opts['spm_cache_dir'], repo ) with salt.utils.fopen(cache_path, 'r') as cph: metadata[repo] = { 'info': repo_info, 'packages': msgpack.load(cph), }
def load(cls, f): # Assumes everything is encoded in UTF-8. # This means that if some records (e.g., config files, feature vector # keys) are not encoded in UTF-8, the model cannot be loaded. However, # such models cannot be written out to text or JSON, so we don't really # care. Callers are responsible for handling UnicodeDecodeError. values = msgpack.load(f, encoding='utf-8', unicode_errors='strict') field_names = map(lambda x: x[0], cls.fields()) c = cls() c.set(dict(zip(field_names, values))) return c
def load(cls, filepath=None): """Load the contents of the given filepath. If None, assume '<current_snapshot>/repos.msgpack'""" if filepath is None: filepath = os.path.join(config['current_snapshot'], 'repos.msgpack') with open(filepath, 'rb') as f: records = msgpack.load(f, object_hook=cls._loader, use_list=False) return records
def load(fp, resource=None, encoding='UTF8', full_clean=True): """ Load a from a MessagePack encoded file. See :py:meth:`loads` for more details of the loading operation. :param fp: a file pointer to read MessagePack data from. :param resource: A resource instance or a resource name to use as the base for creating a resource. :param full_clean: Do a full clean of the object as part of the loading process. :returns: A resource object or object graph of resources loaded from file. """ return resources.build_object_graph(msgpack.load(fp, encoding=encoding), resource, full_clean)
def load_reg(): ''' Load the register from msgpack files ''' reg_dir = _reg_dir() regfile = os.path.join(reg_dir, 'register') try: with salt.utils.fopen(regfile, 'r') as fh_: return msgpack.load(fh_) except: log.error('Could not write to msgpack file {0}'.format(__opts__['outdir'])) raise
def main(): print 'find_use_word' min_threshold = 100 max_threshold = 100000 get_useful_word(min_threshold=min_threshold,max_threshold=max_threshold) print 'create_data' useful_word_count_dic = msgpack.load(open(data_folder+ 'useful_word_count_dic_chinese_filter_%d_%d'%(min_threshold,max_threshold),'rb')) raw_data_folder = '/data/liubo/message/message_content_False' content_sentence_file = data_folder + '/' + 'content_sentence_%d_%d'%(min_threshold,max_threshold) content_word_list_file = data_folder + '/' + 'content_word_list_%d_%d'%(min_threshold,max_threshold) create_data(raw_data_folder,useful_word_count_dic,content_sentence_file,content_word_list_file)
def test_index_directory(self): """ test_index """ self.do_wait_for_tasks = False os.system("rm -Rf testdata/test") self.unzip_testfiles_clean() self.cboptions.sync = False localindex_check = msgpack.load(open("testdata/localindex_test.pickle")) localindex = make_local_index(self.cboptions) #msgpack.dump(localindex, open("testdata/localindex_test.pickle", "w")) self.assertTrue(localindex_check == localindex)
def _load_from_file(self, id): fname = self._get_fname_for_id(id) WAIT_TIME = 0.05 # # Due to some concurrency issues, we need to perform these checks # for _ in xrange(int(1 / WAIT_TIME)): if not os.path.exists(fname): time.sleep(WAIT_TIME) continue # Ok... the file exists, but it might still be being written req_res = open(fname, 'rb') try: data = msgpack.load(req_res, use_list=True) except ValueError: # ValueError: Extra data. returned when msgpack finds invalid # data in the file req_res.close() time.sleep(WAIT_TIME) continue try: request_dict, response_dict, canary = data except TypeError: # https://github.com/andresriancho/w3af/issues/1101 # 'NoneType' object is not iterable req_res.close() time.sleep(WAIT_TIME) continue if not canary == self._MSGPACK_CANARY: # read failed, most likely because the file write is not # complete but for some reason it was a valid msgpack file req_res.close() time.sleep(WAIT_TIME) continue # Success! req_res.close() request = HTTPRequest.from_dict(request_dict) response = HTTPResponse.from_dict(response_dict) return request, response else: msg = 'Timeout expecting trace file to be ready "%s"' % fname raise IOError(msg)
def build_vocab(questions, contexts): ''' Build vocabulary sorted by global word frequency, or consider frequencies in questions first, which is controlled by `args.sort_all`. ''' if args.sort_all: counter = collections.Counter(w for doc in questions + contexts for w in doc) vocab = sorted([t for t in counter if t in wv_vocab], key=counter.get, reverse=True) else: counter_q = collections.Counter(w for doc in questions for w in doc) counter_c = collections.Counter(w for doc in contexts for w in doc) counter = counter_c + counter_q vocab = sorted([t for t in counter_q if t in wv_vocab], key=counter_q.get, reverse=True) vocab += sorted( [t for t in counter_c.keys() - counter_q.keys() if t in wv_vocab], key=counter.get, reverse=True) total = sum(counter.values()) matched = sum(counter[t] for t in vocab) log.info( 'vocab coverage {1}/{0} | OOV occurrence {2}/{3} ({4:.4f}%)'.format( len(counter), len(vocab), (total - matched), total, (total - matched) / total * 100)) vocab.insert(0, "<PAD>") vocab.insert(1, "<UNK>") with open(join(squad_dir, 'vocab.msgpack'), 'rb') as f: voc1 = msgpack.load(f, encoding='utf8') if not set(vocab).issubset(set(voc1)): print( "The checkpoints cannot be used, since a vocabulary has different set of words" ) print( "New dataset %d vs %d (diff in %d), adopting general vocabulary for checkpoints" % (len(vocab), len(voc1), len(vocab) - len(set(vocab) & set(voc1)))) vocab = voc1 else: # to preserve the order of indices due to sorted Counter vocab = voc1 return vocab, voc1, counter
def check_minion_cache(self, preserve_minions=False): ''' Check the minion cache to make sure that old minion data is cleared ''' keys = self.list_keys() minions = [] for key, val in six.iteritems(keys): minions.extend(val) m_cache = os.path.join(self.opts['cachedir'], 'minions') if os.path.isdir(m_cache): for minion in os.listdir(m_cache): if minion not in minions: shutil.rmtree(os.path.join(m_cache, minion)) kind = self.opts.get('__role', '') # application kind if kind not in kinds.APPL_KINDS: emsg = ("Invalid application kind = '{0}'.".format(kind)) log.error(emsg + '\n') raise ValueError(emsg) role = self.opts.get('id', '') if not role: emsg = ("Invalid id.") log.error(emsg + "\n") raise ValueError(emsg) name = "{0}_{1}".format(role, kind) road_cache = os.path.join(self.opts['cachedir'], 'raet', name, 'remote') if os.path.isdir(road_cache): for road in os.listdir(road_cache): root, ext = os.path.splitext(road) if ext not in ['.json', '.msgpack']: continue prefix, sep, name = root.partition('.') if not name or prefix != 'estate': continue path = os.path.join(road_cache, road) with salt.utils.fopen(path, 'rb') as fp_: if ext == '.json': data = json.load(fp_) elif ext == '.msgpack': data = msgpack.load(fp_) if data['role'] not in minions: os.remove(path)
def load_scoops_from_action_index_map(container_dir, header_only=True): ''' Load a msgpack action index map file and return a list of scoop dictionaries ''' indexed_scoop_list = [] action_index_map_filepath = get_container_action_index_map_filepath( container_dir) print(action_index_map_filepath) with open(action_index_map_filepath, 'rb') as ff: imap = mp.load(ff) for packed_indexed_scoop in imap.values(): unpacked_indexed_scoop = unpack_scoop_header(packed_indexed_scoop, container_dir, header_only=header_only) indexed_scoop_list.append(unpacked_indexed_scoop) return indexed_scoop_list
def _read(self): ''' Read in from disk ''' if not HAS_MSGPACK or not os.path.exists(self._path): return with salt.utils.files.fopen(self._path, 'rb') as fp_: cache = msgpack.load(fp_, encoding=__salt_system_encoding__) if "CacheDisk_cachetime" in cache: # new format self._dict = cache["CacheDisk_data"] self._key_cache_time = cache["CacheDisk_cachetime"] else: # old format self._dict = cache timestamp = os.path.getmtime(self._path) for key in self._dict: self._key_cache_time[key] = timestamp if log.isEnabledFor(logging.DEBUG): log.debug('Disk cache retrieved: {0}'.format(cache))
def load(fp, resource=None, encoding='UTF8', full_clean=True, default_to_not_supplied=False): """ Load a from a MessagePack encoded file. See :py:meth:`loads` for more details of the loading operation. :param fp: a file pointer to read MessagePack data from. :param resource: A resource instance or a resource name to use as the base for creating a resource. :param full_clean: Do a full clean of the object as part of the loading process. :returns: A resource object or object graph of resources loaded from file. """ return resources.build_object_graph(msgpack.load(fp, encoding=encoding), resource, full_clean, default_to_not_supplied)
def load(): Storage.DATABASE = QFontDatabase() if not Storage.USER_PREFERENCES_DIR.exists(): Storage.USER_PREFERENCES_DIR.mkdir() Storage.load_page_data() Storage.SOUNDS = { "tap": QSound( Storage.resolve_audio( "navigation_forward-selection-minimal.wav")) } if Storage.USER_DATA_FILE.exists(): with open(Storage.USER_DATA_FILE, "r") as file: Storage.USER_DATA = msgpack.load(file)
def _load_master(self, cls: Type[ma.MasterAsset]) -> ma.MasterDict: name = cls.__name__ # -1 for self, and -1 for the asset_manager argument. # What remains is the number of arguments to keep from the msgpack file itself. argument_count = len(inspect.signature(cls.__init__).parameters) - 2 asset_path = self.path / f'Master/{name}.msgpack' with asset_path.open('rb') as f: data = msgpack.load(f, strict_map_key=False, use_list=False) if self.drop_extra_fields: if len(next(iter(data.values()))) != argument_count: self.logger.info(f'Dropping extra arguments from {name}.') master_dict = ma.MasterDict( {k: cls(self, *(v[:argument_count])) for k, v in data.items()}, name, asset_path) else: master_dict = ma.MasterDict( {k: cls(self, *v) for k, v in data.items()}, name, asset_path) self.masters[name] = master_dict if cls.db_fields: with self.db: cur = self.db.cursor() fields = [ cls.__dataclass_fields__[db_field] for db_field in cls.db_fields ] type_mapping = { bool: 'integer', int: 'integer', float: 'real', str: 'text', msgpack.Timestamp: 'datetime' } cur.execute( f'CREATE TABLE {name}' f'({", ".join(f"{field.name} {type_mapping[field.type]}" for field in fields)})' ) insert_query = f'INSERT INTO {name} VALUES ({f", ".join(["?"] * len(cls.db_fields))})' for value in master_dict.values(): field_dict = dataclasses.asdict(value) field_values = [field_dict[name] for name in cls.db_fields] cur.execute(insert_query, field_values) return master_dict
def cBpack(lang_code, src_path, swear_path, dst_path): """ Works with a cBpack source and builds a combined list. Explaination of cBpack: https://github.com/LuminosoInsight/wordfreq/blob/7a742499a42a6539be772ab26b6460d7e160ae04/wordfreq/__init__.py#L37-L76 Frequency calculation adjusted to work with 15..255 format """ with gzip.open(src_path, "rb") as f_src: data_raw = msgpack.load(f_src, raw=False) header = data_raw[0] if (not isinstance(header, dict) or header.get("format") != "cB" or header.get("version") != 1): raise ValueError("Unexpected header: {}".format(header)) data = data_raw[1:] data_sanitized = [] for innerlist in data: sanitized_list = [] for word in innerlist: word_sanitized = word.strip() if (__validate_str(word_sanitized)): sanitized_list.append(word_sanitized) if len(sanitized_list) > 0: data_sanitized.append(sanitized_list) swear_words = [] with io.open(swear_path, encoding="utf-8") as f_swear: for swear_word in f_swear.readlines(): swear_word = swear_word.strip() if len(swear_word) > 0: if swear_word[0] != "#": swear_words.append(swear_word) index = 0 with io.open(dst_path, "w", encoding="utf-8") as f_dst: # Write header of combined list first f_dst.write(__header(lang_code)) len_list = len(data_sanitized) for innerlist in data_sanitized: freq = __freq_for_index(index, len_list) for word in innerlist: if word in swear_words: adjusted_freq = 0 else: adjusted_freq = freq f_dst.write(" word={},f={}\n".format(word, adjusted_freq)) index += 1
def _recver(self): """ 接收处理数据 """ recv_func = self.sock.recv def _read(c): d = recv_func(c) if d: return d if self.stoped: raise GreenletExit self._recver_on_error = True self._on_socket_error(None) self._recver_on_error = False return None try: sio = BytesIO() while not self.stoped: d = _read(4) if d is None: printf('_read(4) return None') continue dlen = unpack('I', d)[0] #rs = [] sio.seek(0) sio.truncate() while dlen > 0: data = _read(dlen) if data is None: continue #rs.append(data) sio.write(data) dlen -= len(data) #spawn(self._handle, loads(''.join(rs))) sio.seek(0) spawn(self._handle, load(sio)) #self._pool.spawn(self._handle, loads(''.join(rs))) except GreenletExit: printf('[RpcService._recver GreenletExit]%s', self.sock) except Exception as err: printf('[RpcService._recver]%s', err) finally: self.stop()
def load_eval_data(opt, eval_data): # can be extended to true test set with open(eval_data, 'rb') as f: data = msgpack.load(f, encoding='utf8') embedding = torch.Tensor(data['embedding']) assert opt['embedding_dim'] == embedding.size(1) assert opt['num_features'] == len(data['premise_features'][0][0]) eval_set = list(zip( data['premise_ids'], data['premise_features'], data['premise_tags'], data['premise_ents'], data['hypothesis_ids'], data['hypothesis_features'], data['hypothesis_tags'], data['hypothesis_ents'] )) return eval_set, embedding, [text2class(ans) for ans in data['answers']]
def load_processed_data(f, threshold): """Takes a file path containing trajectory data and loads it, returning a data frame with additional metadata. Arguments: f: string Path to trajectory data archive file. threshold: Classifier threshold to filter the results before querying the metadata from the database. Returns: pandas.DataFrame """ data = None try: with zipfile.ZipFile(f, "r", zipfile.ZIP_DEFLATED) as zf: with zf.open(f.split("/")[-1].replace("zip", "msgpack"), "r") as file: data = msgpack.load(file) except Exception as e: print("Error loading {}".format(f)) print(str(e)) return None if len(data) == 0: return None from IPython.display import display frame_id, bee_id0, bee_id1, score = zip(*data) frame_id = pd.Series(frame_id, dtype=np.uint64) bee_id0 = pd.Series(bee_id0, dtype=np.uint16) bee_id1 = pd.Series(bee_id1, dtype=np.uint16) score = pd.Series(score, dtype=np.float32) data = [frame_id, bee_id0, bee_id1, score] data = pd.concat(data, axis=1) data.columns = ["frame_id", "bee_id0", "bee_id1", "score"] data = data[(~pd.isnull(data.score)) & (data.score >= threshold)] all_frame_ids = data.frame_id.unique() metadata = db.metadata.get_frame_metadata(all_frame_ids) metadata.frame_id = metadata.frame_id.astype(np.uint64) metadata["datetime"] = pd.to_datetime(metadata.timestamp, unit="s") data = data.merge(metadata, on="frame_id", how="inner") return data
def main(): picttree.main(picttree.argument_parser().parse_args([ "-m", "black-base", "-t", "128", "-s", "-T", "picture2d", "-O", "msgpack", "-o", "output.p2mt", "input.png" ])) with open("output.p2mt", "rb") as f: treeinfo = msgpack.load(f, encoding="UTF-8") show_0th_volumes(treeinfo) show_0th_volumes_4_5(treeinfo) show_1st_volume(treeinfo) show_1st_volume_6_7(treeinfo) # image = Image.open("input.png").convert("RGB") # print(image) picttree.main(picttree.argument_parser().parse_args([ "-m", "black-base", "-t", "128", "-s", "-T", "picture2d", "-O", "json", "-o", "output-tree.json", "input.png" ]))
def iter_agenda(publications=None, rin_list=None): agenda_dir = Path(data_dir) / 'agenda' if not publications: publications = os.listdir(agenda_dir) if rin_list: rin_list = set(rin_list) for publication in publications: for rin_file in os.listdir(agenda_dir / publication): if rin_list: rin = rin_file.split('.')[0] if rin not in rin_list: continue with open(agenda_dir / publication / rin_file, 'rb') as f: yield load(f)
def load_key(path_or_file, serial='json'): ''' Read in a key from a file and return the applicable key object based on the contents of the file ''' if hasattr(path_or_file, 'read'): stream = path_or_file else: if serial == 'json': stream = open(path_or_file, 'r') else: stream = open(path_or_file, 'rb') try: if serial == 'msgpack': import msgpack key_data = msgpack.load(stream) elif serial == 'json': import json key_data = json.loads(stream.read(), encoding='UTF-8') finally: if stream != path_or_file: stream.close() if 'priv' in key_data and 'sign' in key_data and 'pub' in key_data: return libnacl.dual.DualSecret( libnacl.encode.hex_decode(key_data['priv']), libnacl.encode.hex_decode(key_data['sign'])) elif 'priv' in key_data and 'pub' in key_data: return libnacl.public.SecretKey( libnacl.encode.hex_decode(key_data['priv'])) elif 'sign' in key_data: return libnacl.sign.Signer(libnacl.encode.hex_decode(key_data['sign'])) elif 'pub' in key_data: return libnacl.public.PublicKey( libnacl.encode.hex_decode(key_data['pub'])) elif 'verify' in key_data: return libnacl.sign.Verifier(key_data['verify']) elif 'priv' in key_data: return libnacl.secret.SecretBox( libnacl.encode.hex_decode(key_data['priv'])) raise ValueError('Found no key data')
def unpack_scoop_header(packed_header, container_dir, header_only=True): unpacked_header = { 'id': packed_header[0], 'scoop': unpack_scoop(packed_header[1]), 'measured_result_map': packed_header[2], 'modeled_result_map': packed_header[3] } if not header_only: traj_dir = os.path.join(container_dir, 'get_stuff', 'traj') scoop_filepath = os.path.join(traj_dir, unpacked_header['id']) + '.mpac' try: with open(scoop_filepath, 'rb') as ff: scoop_mpac = mp.load(ff) unpacked_header['scoop'] = unpack_scoop(scoop_mpac) except IOError: print('Scoop file', scoop_filepath, 'not found!') return unpacked_header
def load_http_response_from_temp_file(filename, remove=True): """ :param filename: The filename that holds the HTTP response as msgpack :param remove: Remove the file after reading :return: An HTTP response instance """ # Importing here to prevent import cycle from w3af.core.data.url.HTTPResponse import HTTPResponse try: data = msgpack.load(file(filename, 'rb'), raw=False) result = HTTPResponse.from_dict(data) except: if remove: remove_file_if_exists(filename) raise else: if remove: remove_file_if_exists(filename) return result
def main(argv): ''' Command line tool for MindMeld construction/manipulation. ''' p = argparse.ArgumentParser(prog='melder') p.add_argument('meldfile',help='meld file path') p.add_argument('--add-pypath', dest='pypaths', default=[], action='append', help='add a python path to the meld') p.add_argument('--add-datfiles', dest='datfiles', action='store_true', help='when adding pypath, include datfiles') p.add_argument('--dump-info', dest='dumpinfo', action='store_true', help='dump the entire meld info dictionary to stdout') p.add_argument('--set-name', dest='name', default=None, help='set meld name (ie, "foolib")') p.add_argument('--set-version', dest='version', default=None, help='set meld version (ie, 8.2.30)') opts = p.parse_args(argv) meldinfo = {} if os.path.isfile(opts.meldfile): with open(opts.meldfile,'rb') as fd: meldinfo = msgpack.load(fd,encoding='utf8') meld = s_mindmeld.MindMeld(**meldinfo) if opts.version: meld.setVersion(vertup(opts.version)) if opts.name: meld.setName(opts.name) for pypath in opts.pypaths: meld.addPyPath(pypath,datfiles=opts.datfiles) meldinfo = meld.getMeldDict() if opts.dumpinfo: print(repr(meldinfo)) meldbyts = msgpack.dumps( meld.getMeldDict(), encoding='utf8', use_bin_type=True ) with open(opts.meldfile,'wb') as fd: fd.write(meldbyts)
def load_msgpack(blob, file_type=None): ''' Load a dict packed with msgpack into kwargs for Trimesh constructor Parameters ---------- blob: msgpack packed dict with keys for 'vertices' and 'faces' file_type: not used Returns ---------- loaded: kwargs for Trimesh constructor (aka mesh=trimesh.Trimesh(**loaded)) ''' import msgpack if hasattr(blob, 'read'): data = msgpack.load(blob) else: data = msgpack.loads(blob) loaded = load_dict(data) return loaded
def __init__(self, root, task_root, debug, split="train", data_fraction=1.0): self.root = f"{root}/{task_root}" self.split = split self.name = "clevr" with open(f"{self.root}/{split}.msgpack", "rb") as fp: self.data = msgpack.load(fp, encoding="utf8") if split == "val": self.process_family() if split == "train": partial = int(len(self.data) * data_fraction) self.data = self.data[:partial] with open(f"{self.root}/dictionary.json", "r") as fp: dic = json.load(fp) self.qdic = dic["question"] self.adic = dic["answer"] if debug: self.data = self.data[:1000]
def load_data(self): print('Load train_meta.msgpack...') meta_file_name = os.path.join(self.spacyDir, 'train_meta.msgpack') with open(meta_file_name, 'rb') as f: meta = msgpack.load(f, encoding='utf8') self.opt['char_vocab_size'] = len(meta['char_vocab']) all_embedding = {} if 'GLOVE' in self.opt: glove_embedding = torch.Tensor(meta['glove_embedding']) all_embedding['glove_embedding'] = glove_embedding self.opt['vocab_size'] = glove_embedding.size(0) self.opt['vocab_dim'] = glove_embedding.size(1) if 'FastText' in self.opt: fast_embedding = torch.Tensor(meta['fast_embedding']) all_embedding['fast_embedding'] = fast_embedding self.opt['vocab_size'] = fast_embedding.size(0) self.opt['vocab_dim'] = fast_embedding.size(1) if 'PHOC' in self.opt: phoc_embedding = torch.Tensor(meta['phoc_embedding']) all_embedding['phoc_embedding'] = phoc_embedding return meta['vocab'], meta['char_vocab'], all_embedding
def load_from_file(self, filename): """Load coverage db from a save covdb file. Requires msgpack""" if file_backing_disabled: raise Exception( "[!] Can't save/load coverage db files without msgpack. Try `pip install msgpack`" ) self.filename = filename with open(filename, "r") as f: object_dict = msgpack.load(f) self.module_name = object_dict["module_name"] self.module_base = object_dict["module_base"] self.module_blocks = object_dict["module_blocks"] self.trace_dict = { k: set(v) for k, v in object_dict["trace_dict"].items() } self.block_dict = object_dict["block_dict"] self.function_stats = object_dict["function_stats"] self.coverage_files = object_dict["coverage_files"] self.total_coverage = set(object_dict["total_coverage"]) self.frontier = set(object_dict["frontier"])
def test_minhash(documents, stopwords, labeled): with open(stopwords, 'rb') as fp: stops = set(msgpack.load(fp)) filter_processor = simple_preprocess_and_filter_stopwords(stops) docs = load_docs(documents, process_text=lambda doc: filter_processor( combine_issue_and_body_filter_labels(doc))) labels = load_testset(labeled, docs) model = nutai.minhash.Model() true = [label for _, _, label in labels] pred = [ approx_jaccard_score(model.calculate_signature(docs[id0]['text']), model.calculate_signature(docs[id1]['text'])) for id0, id1, _ in labels ] best_thresh = calculate_best_threshold(pred, labels) print("best threshold:", best_thresh) print_confusion_matrix( confusion_matrix(true, [p > best_thresh for p in pred]))
def main(): print("Downloading data...") os.system("wget https://www.dropbox.com/s/r33ljlagys0wscb/data.msgpack?dl=1") os.system("wget https://www.dropbox.com/s/83txkgiqmdlv1m3/meta.msgpack?dl=1") os.system("mv 'meta.msgpack?dl=1' data/meta.msgpack") os.system("mv 'data.msgpack?dl=1' data/data.msgpack") os.system("python3 -m spacy download en_core_web_sm") print("Done.") print("Loading data...") with open(F_DATA_PATH, 'rb') as f: data = msgpack.load(f, encoding='utf8') print("Done.") print("Processing...") data = ( make_comfort_data(data['train']), make_comfort_data(data['dev'], is_dev=True) ) print("Done.") print("Saving...") save_comfort_data(data) print("Done.")
def main(): path = input('Enter file path of your model:') with open(RAW_META_PATH, 'rb') as f: meta = msgpack.load(f, encoding='utf8') embedding = load_pickle(EMB_PATH) model = models.AttentionModel(path) w2id = {w: i for i, w in enumerate(meta['vocab'])} tag2id = {w: i for i, w in enumerate(meta['vocab_tag'])} ent2id = {w: i for i, w in enumerate(meta['vocab_ent'])} while True: id_ = 0 try: while True: context = input('Enter context: ') if context.strip(): break while True: question = input('Enter question: ') if question.strip(): break except EOFError: break id_ += 1 annotated = annotate(('interact-{}'.format(id_), context, question), meta['wv_cased']) model_in_raw = to_id(annotated, w2id, tag2id, ent2id) model_in = generate_batch(model_in_raw) start_probas, end_probas = model.model.predict(model_in) answ_pair = get_preds2(start_probas, end_probas, MAX_ANSW_LEN) print('Answer:', end=' ') for i in range(answ_pair[0][0], answ_pair[0][1] + 1): print(model_in_raw[6][model_in_raw[7][i][0]:model_in_raw[7][i][1] + 1], end='') print('\n\n\n-------------|||-------------\n\n\n')
def cal_acc(dist_file): x = [] y = [] same_dist_list, no_same_dist_list = msgpack.load(open(dist_file, 'rb')) for dist in same_dist_list: x.append(dist) y.append(0) for dist in no_same_dist_list: x.append(dist) y.append(1) x = np.reshape(np.asarray(x), (len(x), 1)) y = np.asarray(y) train_x, valid_x, train_y, valid_y = train_test_split(x, y, test_size=0.1) clf = LinearSVC() print len(x), len(y) clf.fit(train_x, train_y) acc = accuracy_score(valid_y, clf.predict(valid_x)) print acc clf = DecisionTreeClassifier() clf.fit(train_x, train_y) acc = accuracy_score(valid_y, clf.predict(valid_x)) print acc
def load_object(path, build_fn, *args, **kwargs): """ load from serialized form or build an object, saving the built object; kwargs provided to `build_fn`. """ save = False obj = None if path is not None and os.path.isfile(path): with open(path, 'rb') as obj_f: obj = msgpack.load(obj_f, use_list=False, encoding='utf-8') else: save = True if obj is None: obj = build_fn(*args, **kwargs) if save and path is not None: with open(path, 'wb') as obj_f: msgpack.dump(obj, obj_f) return obj
def get_or_build(path, build_fn, *args, **kwargs): """ 从序列化的形式加载或建立一个对象,保存内置的目的。 剩余的参数被提供给`build_fn`。 """ save = False obj = None if path is not None and os.path.isfile(path): with open(path, 'rb') as obj_f: obj = msgpack.load(obj_f, use_list=False, encoding='utf-8') else: save = True if obj is None: obj = build_fn(*args, **kwargs) if save and path is not None: with open(path, 'wb') as obj_f: msgpack.dump(obj, obj_f) return obj
def load(path): """ Return data read from file path as dict file may be either json, msgpack, or cbor given by extension .json, .mgpk, or .cbor respectively Otherwise raise IOError """ root, ext = os.path.splitext(path) if ext == '.json': with ocfn(path, "rb") as f: it = json.load(f) elif ext == '.mgpk': with ocfn(path, "rb") as f: it = msgpack.load(f) elif ext == '.cbor': with ocfn(path, "rb") as f: it = cbor.load(f) else: raise IOError(f"Invalid file path ext '{path}' " f"not '.json', '.mgpk', or 'cbor'.") return it
def consolidate(dump): pts = {} for x in glob.glob(dump + '/*.msgpack'): msg = msgpack.load(open(x)) if msg[0] == 'sensors': print(msg) for x in msg[3].get('Pupil Point', []): if int(x[0][0]) == -1: continue pts.setdefault(int(x[0][0]), []).append([x[0][2], x[0][1], x[0][4], x[0][3]]) params = [] for k, vs in sorted(pts.items()): p = np.array(vs)[:, 2:] print(p) params.append( [np.mean(p, 0).tolist(), np.cov(p.T).tolist(), p.tolist()]) print(params[-1][0]) print(params[-1][1]) open('calib.js', 'w').write(json.dumps(params))
def getGuides(self, gene_exon): try: filename = gene_exon + ".p" if self.scoring_alg == "Doench": path = os.path.join( os.path.dirname(__file__), 'static/data/GRCh37_guides_msgpack_Doench/', filename) elif self.domains_enabled: path = os.path.join( os.path.dirname(__file__), 'static/data/GRCh37_guides_msgpack_Azimuth_domains/', filename) else: path = os.path.join( os.path.dirname(__file__), 'static/data/GRCh37_guides_msgpack_Azimuth/', filename) with open(path) as datafile: gRNAs = msgpack.load(datafile) return gRNAs except IOError: gene, exon = gene_exon.split('_') raise ExonError(gene, exon)