Exemple #1
0
def get_rules(rebuild=False):
    mtime = (publications_file.stat().st_mtime_ns,
             projects_file.stat().st_mtime_ns)
    rules = _cached_rules.get(mtime)
    if rebuild or not rules:
        _cached_rules.clear()
        _cached_rules[mtime] = rules = build_rules(
            json_load(publications_file), json_load(projects_file))
        threading.Thread(target=validate_permissions, args=(rules, )).start()
    return rules
def make_dataframe(path):
    threads = json_load(path)
    all_messages = []
    for i, th in enumerate(threads):
        h = th['head']

        if len(th['tail']) == 0:
            continue

        all_messages.append(
            (h['id'],
             i, h['title'],
             (h['body'] or '') + ' ' + make_label_string(h['labels']),
             h['sender'], h['created_at'])
        )
        for t in th['tail']:
            all_messages.append(
                (t['id'], i,
                 'RE: ' + h['title'],
                 t['body'],
                 t['sender'], t['created_at'])
            )
    return pd.DataFrame(all_messages,
                        columns=['message_id',
                                 KEY_THREAD_ID, 'subject', 'body',
                                 KEY_SENDER_ID, KEY_TIMESTAMP])
Exemple #3
0
def add_sc_locale_to_history(repo_dir):

    db = get_db()

    source_dir = pathlib.Path(repo_dir) / 'suttacentral/client/localization/elements/'

    docs = {}
    files = list(source_dir.glob('*.json'))
    lang = 'en'
    for file in files:
        entries = json_load(file)

        for k, v in entries.items():
            context = f'{file.name.split("_")[0]}_{k}'

            if context in docs:
                doc = docs[context]
            else:
                doc = {
                    '_key': f'sc_latest_{context}',
                    'context': context,
                    'origin': 'sc_latest',
                    'strings': {}
                }
                docs[context] = doc
            doc['strings'][lang] = v

    errors = db['historic'].import_bulk(docs.values(), on_duplicate='replace', halt_on_error=False)
    
    return files, errors
Exemple #4
0
def open_cache(suite, prefix):
    try:
        with open(os.path.join(awfy.path, prefix + '.json')) as fp:
            cache = util.json_load(fp)
            return cache['graph']
    except:
        return {'timelist': [], 'lines': [], 'direction': suite.direction}
Exemple #5
0
def update_site(reference=None):
    db = get_db()
    db.collection('historic').truncate()
    temp_dir_old = clone_repo(branch_or_tag='prelocale', reference=reference)
    add_old_sc_locale_to_history(temp_dir_old.name)

    temp_dir_latest = clone_repo('master', reference=reference)
    files, _ = add_sc_locale_to_history(temp_dir_latest.name)

    add_site_to_history()

    # A mapping of pathlib.Path to the data contained within the file
    root_files_and_data = {}
    for file in files:
        stem = file.stem.split('_')[0]
        new_name = f'{stem}_root-en-site.json'
        new_file = pathlib.Path('root/en/site') / new_name
        data = json_load(file)
        root_files_and_data[new_file] = data


    temp_dir_old.cleanup()
    temp_dir_latest.cleanup()

    print("Waiting for ArangoDB to complete view update")
    db.aql.execute(STALL_QUERY)

    print('Restoring site projects from history')
    files_and_data = restore_site_from_history(root_files_and_data)
    files_and_data.update(root_files_and_data)
    rewrite_site_projects(files_and_data)
Exemple #6
0
def main():
    opt = json_load('./option.json')

    logging.basicConfig(filename=opt.PREFIX + '.log',
                        level=logging.DEBUG,
                        format='%(asctime)s %(message)s')
    logger = logging.getLogger('default_logger')
    logger.addHandler(logging.StreamHandler())

    with tf.Session() as sess:
        env = Environment(opt.ACTION_REPEAT)
        age = Agent(env, sess, logger)
        sess.run(tf.global_variables_initializer())

        start_episode = 12345  # if load past
        if start_episode > 0:
            age.load(meta_graph='./save/' + opt.PREFIX +
                     '-%s.meta' % start_episode,
                     step=start_episode)  #age.save(True)#
        else:
            age.save(True)
            age.train(start_episode=0)

        for epi in range(opt.TEST_EPISODE_MAX):
            reward = age.play()
            debug(logger, 'Test episode %d got reward %d' % (epi, reward))
Exemple #7
0
def open_cache(suite, prefix):
    try:
        with open(os.path.join(awfy.path, prefix + ".json")) as fp:
            cache = util.json_load(fp)
            return cache["graph"]
    except:
        return {"timelist": [], "lines": [], "direction": suite.direction}
    def __init__(self):
        self.userName = "******".format(
            studentId=config.get("Joyrun", "StudentID"),
            suffix=config.get("Joyrun", "suffix"))
        self.password = config.get("Joyrun", "Password")

        try:
            cache = json_load(self.Cache_LoginInfo)
        except (FileNotFoundError, JSONDecodeError):
            cache = {}

        if cache.get("userName") == self.userName:
            self.uid = cache.get("uid", 0)
            self.sid = cache.get("sid", '')
        else:  # userName 不匹配,则不使用缓存信息
            self.uid = 0
            self.sid = ''

        self.session = requests.Session()

        self.session.headers.update(self.base_headers)
        self.session.headers.update(self.device_info_headers)

        self.auth = JoyrunAuth(self.uid, self.sid)

        if self.uid and self.sid:  # 直接从缓存中读取登录状态
            self.__update_loginInfo()
        else:
            self.login()  # 否则重新登录
    def __init__(self):
        # filenames is the key which connecting image and text for pair
        self.train_file_names = u.pickle_load(c.train_filename_path)
        self.test_file_names = u.pickle_load(c.test_filename_path)

        self.train_file_names = sorted(self.train_file_names)
        self.test_file_names = sorted(self.test_file_names)

        self.train_class_id = c.train_class_info
        self.test_class_id = c.test_class_info
        # self.train_class_id = u.pickle_load2(c.train_class_id_txt_path)
        # self.test_class_id = u.pickle_load2(c.test_class_id_txt_path)

        # text
        self.tokenizer = RegexpTokenizer(r'\w+')

        if not os.path.isfile(c.vocab_path):
            train_text = self.__load_all_text(self.train_file_names)
            test_text = self.__load_all_text(self.test_file_names)
            all_text = train_text + test_text

            self.__word_count_statistics(all_text)

            vocab = v.vocab()
            vocab.create(all_text)

        self.word_2_index = u.json_load(c.vocab_path)
        self.index_2_word = u.json_load(c.index_2_word_path)
        # self.index_2_word = {v: k for k, v in self.word_2_index.items()}
        self.vocab_size = len(self.word_2_index)
        print("vocab_size : ", self.vocab_size)

        # image
        self.base_img_size = [64, 128, 256]
        label_image_size = 256
        rate = 76 / 64
        self.image_transform = transforms.Compose([
            transforms.Resize(int(label_image_size * rate)),
            transforms.RandomCrop(label_image_size),
            transforms.RandomHorizontalFlip()
        ])

        self.norm = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
        self.filenames_bbox = self.__load_bbox()
Exemple #10
0
def load_metadata(prefix):
    try:
        with open(os.path.join(awfy.path, "metadata-" + prefix + ".json"), "r") as fp:
            cache = util.json_load(fp)
    except:
        cache = {"earliest_run_id": 0}

    return cache
Exemple #11
0
def load_metadata(prefix):
    try:
        with open(os.path.join(awfy.path, 'metadata-' + prefix + '.json'), 'r') as fp:
            cache = util.json_load(fp)
    except:
        cache = { 'last_stamp': 0 }

    return cache
Exemple #12
0
    def __build(self):
        """ 构造记录
        """
        points_per_loop = json_load(self.A_Loop_GPS_JSON)

        self.date = self.__get_date()
        self.duration = int(self.distance * self.Distance_Per_Loop / 0.4 * (self.pace + self.__pace_delta()) * 60) # 跑步时间/s
        self.step = int((self.stride_frequncy + self.__stride_frequncy_delta()) * self.duration / 60) # 总步数
        self.detail = list(self.__point_generator(points_per_loop))
Exemple #13
0
def get_rules():
    mtime = publications_file.stat().st_mtime_ns
    rules = _cached_rules.get(mtime)
    if not rules:
        _cached_rules.clear()
        _cached_rules[mtime] = rules = build_rules(
            json_load(publications_file))
        threading.Thread(target=validate_permissions, args=(rules, )).start()
    return rules
def open_cache(suite, prefix):
    try:
        with open(os.path.join(awfy.path, prefix + '.json')) as fp:
            cache = util.json_load(fp)
            return cache['graph']
    except:
        return { 'timelist': [],
                 'lines': [],
                 'direction': suite.direction
               }
Exemple #15
0
 def count_strings(self, entry):
     json_file = get_file(entry["path"])
     data = json_load(json_file)
     count = 0
     for k, v in data.items():
         if k == "_meta":
             continue
         if v:
             count += 1
     return count
Exemple #16
0
def load_json(entry):
    _meta = entry.get("_meta", {})
    _meta["path"] = entry.get("path")

    json_file = get_file(entry["path"])

    if json_file.exists():
        segments = json_load(json_file)
    else:
        segments = {}
    return {**deepcopy(_meta), "segments": segments}
Exemple #17
0
def main():
    import os
    import cPickle as pkl
    import pandas as pd
    from util import json_load
    from max_cover import k_best_trees
    import argparse
    
    parser = argparse.ArgumentParser('Evaluate the events')
    parser.add_argument('-c', '--cand_trees_path', required=True, nargs='+')
    parser.add_argument('--interactions_path', required=True)
    parser.add_argument('--events_path', required=True)
    args = parser.parse_args()

    interactions = json_load(args.interactions_path)
    true_events = json_load(args.events_path)
    methods = [metrics.adjusted_rand_score,
               metrics.adjusted_mutual_info_score,
               metrics.homogeneity_score,
               metrics.completeness_score,
               metrics.v_measure_score]

    K = 10
    indexes = []
    scores = []
    for p in args.cand_trees_path:
        cand_trees = pkl.load(open(p))
        pred_trees = k_best_trees(cand_trees, K)

        indexes.append(os.path.basename(p))
        scores.append(evaluate_meta_tree_result(
            true_events,
            pred_trees,
            [i['message_id'] for i in interactions],
            methods
        ))
    df = pd.DataFrame(scores, index=indexes,
                      columns=[m.__name__ for m in methods] +
                      [m.__name__ + "(all)" for m in methods] +
                      ['precision', 'recall', 'f1'])
    df.to_csv('tmp/evaluation.csv')
Exemple #18
0
 def get_topic_meta_graph_from_synthetic(cls,
                                         path,
                                         preprune_secs,
                                         **kwargs):
     return cls.get_topic_meta_graph(json_load(path),
                                     cosine,
                                     preprune_secs=preprune_secs,
                                     decompose_interactions=False,
                                     given_topics=True,
                                     convert_time=False,
                                     **kwargs
                                 )
Exemple #19
0
class Memories(PowerDict, Capability):
    personal = json_load(os.path.join(REFERENCE_DIR, 'personal.json'))

    def __init__(self, info={}, person=None):
        PowerDict.__init__(self, info)
        self.update(Memories.calculated(self))
        Capability.__init__(self, person)

    @classmethod
    def calculated(cls, self):
        return {
            'full_name': self['first_name'] + " " + self['last_name'],
            'unique_id': self['first_name'] + "_" + self['last_name']
        }

    def run_phrase(self):
        return ""  # TODO: Alex

    def __json__(self):
        return self

    @classmethod
    def from_json(cls, obj):
        return cls(obj)

    @classmethod
    def random_gender(cls):
        return first_sample(cls.personal['gender']).lower()

    @classmethod
    def names(cls):
        return cls.personal['names']

    @classmethod
    def random_first_name(cls, gender):
        names = cls.names()
        if gender in names:
            pool = names[gender]
        else:
            pool = names['male'] + names['female']
        return first_sample(pool)

    @classmethod
    def random_last_name(cls):
        return first_sample(cls.personal['surnames'])

    @classmethod
    def random(cls):
        mem = cls()
        mem.gender = cls.random_gender()
        mem.first_name = cls.random_first_name(mem.gender)
        mem.last_name = cls.random_last_name()
        return mem
def main():
    import os
    import cPickle as pkl
    import pandas as pd
    from util import json_load
    from max_cover import k_best_trees
    import argparse

    parser = argparse.ArgumentParser('Evaluate the events')
    parser.add_argument('-c', '--cand_trees_path', required=True, nargs='+')
    parser.add_argument('--interactions_path', required=True)
    parser.add_argument('--events_path', required=True)
    args = parser.parse_args()

    interactions = json_load(args.interactions_path)
    true_events = json_load(args.events_path)
    methods = [
        metrics.adjusted_rand_score, metrics.adjusted_mutual_info_score,
        metrics.homogeneity_score, metrics.completeness_score,
        metrics.v_measure_score
    ]

    K = 10
    indexes = []
    scores = []
    for p in args.cand_trees_path:
        cand_trees = pkl.load(open(p))
        pred_trees = k_best_trees(cand_trees, K)

        indexes.append(os.path.basename(p))
        scores.append(
            evaluate_meta_tree_result(true_events, pred_trees,
                                      [i['message_id'] for i in interactions],
                                      methods))
    df = pd.DataFrame(scores,
                      index=indexes,
                      columns=[m.__name__ for m in methods] +
                      [m.__name__ + "(all)"
                       for m in methods] + ['precision', 'recall', 'f1'])
    df.to_csv('tmp/evaluation.csv')
Exemple #21
0
    def load(cls, unique_id):
        folder = os.path.join(PEOPLE_DIR, unique_id)

        # check if they exist
        if not os.path.exists(folder):
            return None

        caps = {}
        for k, v in Person.cap_classes.iteritems():
            filepath = os.path.join(folder, k + '.json')
            if os.path.exists(filepath):
                caps[k] = json_load(filepath, cls=v)
            else:
                caps[k] = v()
        return cls(caps)
Exemple #22
0
    def __point_generator(self):
        """ GPS 坐标 生成器

            无限循环跑圈,并对每个点都引入一个偏差量
        """
        points_per_loop = json_load(self.A_Loop_GPS_JSON)
        points_num_per_loop = int(self.pace * 0.4 * 60 / self.sampleinterval)
        while True:
            for i in range(points_num_per_loop):
                idx = math.floor(i / points_num_per_loop *
                                 len(points_per_loop))
                point = points_per_loop[idx]  # .copy()
                point[0] += self.__point_delta()
                point[1] += self.__point_delta()
                yield point.copy()  # 一定要 copy 否则会同步修改
Exemple #23
0
def main():
    opt = json_load('./option.json')
    logging.basicConfig(filename=opt.PREFIX+'.log', level=logging.DEBUG, format='%(asctime)s %(message)s')
    with tf.Session() as sess:
        env = Environment()
        age = Agent(env, sess)
        sess.run(tf.global_variables_initializer())
        
        start_episode = 0 # if load past
        if start_episode > 0:
            age.load(meta_graph='./save/BREAKOUT_DQN-%s.meta' % start_episode, step=start_episode)#age.save(True)#
            age.train(start_episode=start_episode)#age.train()#
        else:
            age.save(True)
            age.train(start_episode=0)
        
        for epi in range(opt.TEST_EPISODE_MAX):
            reward = age.play()
            print('Test episode %d got reward %d' % (epi, reward))
            debug('Test episode %d got reward %d' % (epi, reward))
Exemple #24
0
def find_all_months(cx, prefix, name):
    pattern = prefix + 'raw-' + name + '-(\d\d\d\d)-(\d+)\.json'

    files = []
    for file in os.listdir(awfy.path):
        m = re.match(pattern, file)
        if not m:
            continue

        year = int(m.group(1))
        month = int(m.group(2))
        files.append(((year, month), file))

    files = sorted(files, key=lambda key: key[0][0] * 12 + key[0][1])
    graphs = []
    for when, file in files:
        with open(os.path.join(awfy.path, file)) as fp:
            cache = util.json_load(fp)
        graphs.append((when, cache['graph']))

    return graphs
Exemple #25
0
    def get_token(self, refresh=False):
        """ 如果 token 没有过期,则返回缓存 token ,否则重新登录

            Args:
                refresh    bool    是否立即重新登录并刷新缓存(默认 false)
            Returns:
                token      str     token 字符串
        """
        try:
            if refresh:
                token = self.__login()
            else:
                tokenCache = json_load(self.Cache_AccessToken)
                if tokenCache["expire_in"] < time.time():
                    token = self.__login()
                else:
                    token = tokenCache["token"]
        except (FileNotFoundError, JSONDecodeError):  # 无缓存文件或者文件为空
            token = self.__login()
        finally:
            return token
Exemple #26
0
def find_all_months(cx, prefix, name):
    pattern = prefix + 'raw-' + name + '-(\d\d\d\d)-(\d+)\.json'

    files = []
    for file in os.listdir(awfy.path):
        m = re.match(pattern, file)
        if not m:
            continue

        year = int(m.group(1))
        month = int(m.group(2))
        files.append(((year, month), file))

    files = sorted(files, key=lambda key: key[0][0] * 12 + key[0][1])
    graphs = []
    for when, file in files:
        with open(os.path.join(awfy.path, file)) as fp:
            cache = util.json_load(fp)
        graphs.append((when, cache['graph']))

    return graphs
Exemple #27
0
def add_old_sc_locale_to_history(repo_dir):
    """
    
    Historically SuttaCentral contained both roots and translations for the site localization

    """

    db = get_db()

    source_dir = pathlib.Path(repo_dir) / 'suttacentral/client/localization/elements/'

    docs = {}

    for folder in source_dir.glob('*'):
        if not folder.is_dir():
            continue
        for file in folder.glob('*.json'):
            lang = file.stem
            data = json_load(file)
            entries = data[lang]

            for k, v in entries.items():
                context = f'{folder.name}_{k}'

                if context in docs:
                    doc = docs[context]
                else:
                    doc = {
                        '_key': f'sc_old_{context}',
                        'context': context,
                        'origin': 'sc_old',
                        'strings': {}
                    }
                    docs[context] = doc
                doc['strings'][lang] = v

    errors = db['historic'].import_bulk(docs.values(), on_duplicate='replace', halt_on_error=False)
    return errors
Exemple #28
0
    def __init__(self):
        self.userName = "******" % config.get("Joyrun", "StudentID")
        self.password = config.get("Joyrun", "Password")

        try:
            cache = json_load(self.Cache_LoginInfo)
        except (FileNotFoundError, JSONDecodeError):
            cache = {}

        self.uid = cache.get("uid", 0)
        self.sid = cache.get("sid", '')

        self.session = requests.Session()

        self.session.headers.update(self.base_headers)
        self.session.headers.update(self.device_info_headers)

        self.auth = JoyrunAuth(self.uid, self.sid)

        if self.uid and self.sid: # 直接从缓存中读取登录状态
            self.__update_loginInfo()
        else:
            self.login() # 否则重新登录
Exemple #29
0
def retrieve_graph(cx, file):
    with open(os.path.join(awfy.path, file)) as fp:
        cache = util.json_load(fp)
    return cache['graph']
def retrieve_graph(cx, file):
    with open(os.path.join(awfy.path, file)) as fp:
        cache = util.json_load(fp)
    return cache['graph']
Exemple #31
0
    def recurse(folder, meta_definitions=None):
        subtree = {}
        meta_definitions = meta_definitions.copy()

        metafiles = set(folder.glob("_*.json"))
        if metafiles:
            for metafile in sorted(metafiles, key=humansortkey):
                file_data = json_load(metafile)
                meta_definitions.update(file_data)

                for k, v in file_data.items():
                    if k not in _meta_definitions:
                        _meta_definitions[k] = v

        for file in sorted(folder.glob("*"), key=humansortkey):

            if file.name.startswith("."):
                continue
            if file in metafiles:
                continue
            long_id = file.stem
            meta = {}
            for part in file.parts:
                if part.endswith(".json"):
                    part = part[:-5]
                if part in meta_definitions:
                    meta[part] = meta_definitions[part]
            if file.is_dir():
                subtree[file.name] = recurse(file,
                                             meta_definitions=meta_definitions)
                subtree[file.name]["_meta"] = meta
            elif file.suffix == ".json":
                mtime = file.stat().st_mtime_ns
                path = str(file.relative_to(WORKING_DIR))
                obj = subtree[long_id] = {
                    "path": path,
                    "mtime": mtime,
                    "_meta": meta
                }
                if "_" in long_id:
                    uid, muids = get_uid_and_muids(file)
                else:
                    uid = file.name if file.is_dir() else file.stem
                    muids = None
                obj["uid"] = uid
                if uid not in uid_index:
                    uid_index[uid] = set()
                uid_index[uid].add(long_id)
                if long_id in file_index:
                    logging.error(f"{str(file)} not unique")
                file_index[long_id] = obj
                if muids:
                    for muid in muids:
                        if muid not in muid_index:
                            muid_index[muid] = set()
                        muid_index[muid].add(long_id)

                    # Create Virtual Files
                    if 'translation' in muids:
                        uid, muids = long_id.split('_')
                        muids = muids.replace('translation', 'comment')
                        comment_stem = f"{uid}_{muids}"
                        if comment_stem in uid_index:
                            continue
                        parent = pathlib.Path('comment') / file.relative_to(
                            WORKING_DIR / 'translation').parent
                        virtual_file = parent / (comment_stem + '.json')
                        meta = {
                            part: meta_definitions[part]
                            for part in muids.split('-')
                            if part in meta_definitions
                        }
                        obj = {
                            "uid": uid,
                            "path": str(virtual_file),
                            "mtime": None,
                            "_meta": meta
                        }
                        uid_index[uid].add(comment_stem)
                        file_index[comment_stem] = obj
                        for muid in muids.split('-'):
                            muid_index[muid].add(comment_stem)

        return subtree
Exemple #32
0
        def recurse(folder, meta_definitions=None, depth=0):
            subtree = {}
            meta_definitions = meta_definitions.copy()

            metafiles = set(folder.glob("_*.json"))
            if metafiles:
                for metafile in sorted(metafiles, key=humansortkey):
                    file_data = json_load(metafile)
                    if isinstance(file_data, dict):
                        meta_definitions.update(file_data)

                        for k, v in file_data.items():
                            if k not in _meta_definitions:
                                _meta_definitions[k] = v

            for file in sorted(folder.glob("*"), key=humansortkey):

                if file.name.startswith("."):
                    continue
                if file in metafiles:
                    continue
                long_id = file.stem
                meta = {}
                for part in file.parts:
                    if part.endswith(".json"):
                        part = part[:-5]
                    if part in meta_definitions:
                        meta[part] = meta_definitions[part]
                if file.is_dir():
                    subtree[file.name] = recurse(
                        file,
                        meta_definitions=meta_definitions,
                        depth=depth + 1)
                    subtree[file.name]["_meta"] = meta
                elif file.suffix == ".json":
                    mtime = file.stat().st_mtime_ns
                    path = str(file.relative_to(WORKING_DIR))
                    obj = subtree[long_id] = {
                        "path": path,
                        "mtime": mtime,
                        "_meta": meta
                    }
                    if "_" in long_id:
                        uid, muids = get_uid_and_muids(file)
                    else:
                        uid = file.name if file.is_dir() else file.stem
                        muids = None
                    obj["uid"] = uid
                    if uid not in uid_index:
                        uid_index[uid] = set()
                    uid_index[uid].add(long_id)
                    if long_id in file_index:
                        logging.error(f"{str(file)} not unique")
                    file_index[long_id] = obj
                    if muids:
                        for muid in muids:
                            if muid not in muid_index:
                                muid_index[muid] = set()
                            muid_index[muid].add(long_id)

                        # Create Virtual Files
                        if 'translation' in muids:
                            uid, muids = long_id.split('_')
                            _add_virtual_comment_file(uid, muids, file,
                                                      uid_index, muid_index,
                                                      file_index,
                                                      meta_definitions)

            if depth == 0:
                _add_virtual_project_files(uid_index, muid_index, file_index,
                                           subtree, _meta_definitions)
            return subtree
def get_interaction_ids(path):
    return [i["message_id"] for i in json_load(path)]
Exemple #34
0
	def from_str(cls, s):
		data = json_load(s)
		t = Transaction(**data['transaction'])
		return cls(t, data['data'])
import gensim
import networkx as nx
from scipy.spatial.distance import cosine
from datetime import timedelta

from interactions import InteractionsUtil as IU

from util import json_load

interactions = json_load('data/enron/interactions.json')

lda_model = gensim.models.ldamodel.LdaModel.load(
    'data/enron/model-50-50.lda'
)
dictionary = gensim.corpora.dictionary.Dictionary.load(
    'data/enron/dict.pkl'
)

different_weights = [
    {'topics': 0.2,
     'bow': 0.8},
    {'topics': 1.0},
    {'bow': 1.0},
]

for weights in different_weights:
    meta_graph_kws = {
        'distance_weights': weights,       
    }

    g = IU.get_topic_meta_graph(
def get_interaction_ids(path):
    return [i['message_id'] for i in json_load(path)]
def load_dataset(conf, lang, bert=None):
    if conf.best_vocab_size:
        conf.vocab_size = json_load(conf.best_vocab_size_file)[conf.lang]
    data = datasets[conf.dataset].load(conf, lang, bert=bert)
    data.describe()
    return data
Exemple #38
0
def test_convert_json():
    for filename in glob.glob(os.path.join(SCRIPT_DIR, '*.json')):
        example = json_load(filename)
        normalize_dict(example)
        yield assert_converts, example
Exemple #39
0
    def running_record(self):
        """ 构造一次跑步记录
        """
        points_per_loop = json_load(self.A_Loop_GPS_JSON)  # 一圈的坐标
        distance = config.getfloat("PB", "distance")  # 总距离 km
        pace = config.getfloat("PB", "pace")  # 速度 min/km
        stride_frequncy = config.getint("PB", "stride_frequncy")  # 步频 步/min
        duration = distance * pace * 60  # 用时 s

        cal_per_loop = lambda: 20 + random.random() * (23 - 20)  # 20-23 每圈
        point_delta = lambda: (random.random() - 0.5) * 0.00005  # 随机坐标偏差
        distance_delta_rate = lambda: 1 + (random.random() - 0.5
                                           ) * 0.1  # 0.95 - 1.05 的距离随机倍率
        stride_frequncy_delta = lambda: int(
            (random.random() - 0.5) * 2 * 10)  # 10步/min 的随机步频偏差
        random_alt = lambda: round(42 + random.random() *
                                   (48 - 42), 1)  # 42-48 海拔
        random_speed = lambda: round(3.1 + random.random() *
                                     (4.4 - 3.1), 2)  # 没搞懂 speed 怎么定义的 ...

        def locus_generator():
            end_time = int(time.time() * 1000)
            start_time = now_time = end_time - int(duration * 1000)  # 从开始时间起
            now_stepcount = 0
            now_distance = 0.00
            while now_time <= end_time:
                for point in points_per_loop:
                    per_distance = 0.4 / len(
                        points_per_loop) * distance_delta_rate()  # 两点间距离 km
                    now_stepcount += int(
                        (stride_frequncy + stride_frequncy_delta()) *
                        per_distance * pace)
                    now_distance += per_distance
                    yield {
                        # "id": ??? # 拿不到主键,但是不加主键也可以提交成功,数据库应该设置了主键自增长
                        "alt": random_alt(),
                        "speed": random_speed(),
                        "heartrate": 0,
                        "distance": now_distance,
                        "lat": round(point['lat'] + point_delta(), 8),
                        "lng": round(point['lng'] + point_delta(), 8),
                        "stepcount": now_stepcount,
                        "traintime": now_time,
                    }
                    now_time += int(per_distance * pace * 60 * 1000)  # 时间间隔 ms

        locuslist = list(locus_generator())
        distance = locuslist[-1]['distance']  # 实际的距离 km
        duration = (locuslist[-1]['traintime'] -
                    locuslist[0]['traintime']) / 1000  # 实际的用时 s

        return json.dumps({
            "biggerId":
            self.biggerId,
            "token":
            self.token,
            "locusrlist": [{
                "cal": int(cal_per_loop() * distance * 1000 / 400),
                "distance": round(distance, 2),
                "duration": int(duration),
                "heartrate": 0,
                "team": 1,
                # "pace": ??? # 此处的 pace 与跑步记录中的 pace 含义不统一
                # "intermittent": ???
                "locuslist": [locuslist],
            }],
        }).encode('utf-8')
def update_segment(segment, user):
    """
    segment looks like:
    {
      "segmentId": "dn1:1.1",
      "field": "translation-en-sujato",
      "value": "..", "oldValue": "..."
    }
    """

    segment_id = segment["segmentId"]

    if not is_id_legal(segment_id):
        logging.error(f"Invalid Segment ID: {segment_id}")
        return {"error": "Invalid Segment ID"}

    uid, _ = segment_id.split(":")
    parent_uid = get_parent_uid(uid)

    long_id = f'{parent_uid}_{segment["field"]}'
    try:
        filepath = get_file_path(long_id)
    except KeyError as e:
        logging.exception(e)
        logging.error('f"{long_id}" not found, {segment}')
        return {"error": "file not found"}

    file = get_file(filepath)

    permission = get_permissions(filepath, user['login'])
    if permission != Permission.EDIT:
        logging.error("User not allowed to edit")
        return {"error": "Inadequate Permission"}

    with git_fs._lock:
        try:
            file_data = json_load(file)
        except FileNotFoundError:
            file.parent.mkdir(parents=True, exist_ok=True)
            file_data = {}
        current_value = file_data.get(segment_id)
        result = {}
        if current_value and current_value != segment.get("oldValue"):
            result["clobbered"] = current_value

        if current_value != segment["value"]:
            result["changed"] = True

        file_data[segment_id] = segment["value"]

        sorted_data = dict(sorted(file_data.items(), key=bilarasortkey))

        try:
            json_save(sorted_data, file)
            result["success"] = True
        except Exception:
            logging.exception(f"could not write segment: {segment}")
            return {"error": "could not write file"}

        executor.submit(background_update, filepath, user, segment)
        return result
Exemple #41
0
        # filter out articles with single company tag
        if has_multiple_companies(a):
            valid_articles.append(
                transform_article(a)
            )
    return valid_articles


def dump2interactions(db, collection_name, output_path):
    articles = articles_articles(db, collection_name)
    print('# valid articles: ', len(articles))
    json_dump(articles, output_path)
    return articles


def collect_people_info(articles):
    participant_ids = set(
        itertools.chain(*[a['participant_ids'] for a in articles])
        )
    print('# unique participants: ', len(participant_ids))
    return [{'id': p} for p in participant_ids]


if __name__ == '__main__':
    # articles = dump2interactions(MongoClient()['bloomberg'],
    #                              'articles',
    #                              'data/bloomberg/interactions.json')
    articles = json_load('data/bloomberg/interactions.json')
    json_dump(collect_people_info(articles),
              'data/bloomberg/people.json')