Esempio n. 1
0
    def _check_dir(self,env):
        movs = [(-1, 0), (0, 1), (1, 0), (0, -1)]

        for mov in movs[:]:
            if is_in(self.row + mov[0],self.column + mov[1], env):
                place = env[self.row + mov[0]][self.column + mov[1]]

                if len(place.objects):                    
                    if 3 in place.objects:
                        j = 2
                        while True:
                            if is_in(self.row + mov[0]*j, self.column + mov[1]*j, env):
                                objects = env[self.row + mov[0]*j][self.column + mov[1]*j].objects
                                if len(objects) and 3 in objects:
                                    j += 1
                                    continue
                                elif not len(objects):
                                    break
                            
                            movs.remove(mov)
                            break        
                    else:
                        movs.remove(mov)
            else:
                movs.remove(mov)
       
        return movs
Esempio n. 2
0
def _filter_synset(term, not_terms, synsets):
    for synset, gloss, lemmas in synsets:
        if (
                is_in(term, gloss) or
                is_in(term, lemmas)
        ) and not (
                is_in(not_terms, gloss) or
                is_in(not_terms, lemmas)
        ):
            yield synset, gloss, lemmas
Esempio n. 3
0
    def load_events_file(self):
        # The QWidget widget is the base class of all user interface objects in PyQt4.
        w = QtGui.QWidget()
        # Set window size.
        w.resize(320, 240)
        # Set window title
        w.setWindowTitle("Hello World!")
        filename = QtGui.QFileDialog.getOpenFileName(
            w, 'Open File', '/', "Text Files (*.txt *.csv)")
        print(filename)

        try:
            df = pd.read_csv(str(filename))
            if not is_in(REQUIRED_COLS, df.columns):
                # display error message
                show_message("Dataframe must contain columns:\n\n{0}".format(
                    ", ".join(REQUIRED_COLS)))
            else:
                folder = os_path.dirname(str(filename))
                self.events_df = df
                self.events_df[
                    "next_img_name"] = folder + self.events_df["next_img_name"]
                self.images_names = df.next_img_name.unique()
                self.count = -1
                self.update_image(foward=True)
        except Exception as e:
            print(e)
            show_message("Not a valid dataframe")
Esempio n. 4
0
    def _path_to_closest_object(self, env, obj):
        if self.have_child:
            checked = [[
                True
                if 3 in env[i][j].objects or 2 in env[i][j].objects else False
                for j in range(len(env[0]))
            ] for i in range(len(env))]
        else:
            checked = [[
                True if 3 in env[i][j].objects else False
                for j in range(len(env[0]))
            ] for i in range(len(env))]

        moves = [(-1, 0), (0, 1), (1, 0), (0, -1)]
        cola = [[(self.row, self.column)]]

        while cola:
            path = cola.pop(0)
            row, col = path[-1]

            if len(env[row][col].objects) and env[row][col].objects[0] == obj:
                return path

            for direct in moves:
                new_row = row + direct[0]
                new_col = col + direct[1]

                if is_in(new_row, new_col,
                         checked) and not checked[new_row][new_col]:
                    new_path = list(path)
                    new_path.append((new_row, new_col))
                    cola.append(new_path)

            checked[row][col] = True
Esempio n. 5
0
 def goal_test(self, state):
     """Return True if the state is a goal. The default method compares the
     state to self.goal or checks for state in self.goal if it is a list, as specified in the constructor. Override this
     method if checking against a single self.goal is not enough."""
     if isinstance(self.goal, list):
         return is_in(state, self.goal)
     else:
         return state == self.goal
Esempio n. 6
0
    def goal_test(self, state):
        """Retornar verdadeiro se o estado for uma meta. O método padrão compara o
         estado para self.goal ou verifica o estado em self.goal se é uma
         lista, conforme especificado no construtor."""
        if isinstance(self.goal, list):
            return is_in(state, self.goal)
        else:
            return state == self.goal
Esempio n. 7
0
 def goal_test(self, state):
     """Return True if the state is a goal. The default method compares the
     state to self.goal or checks for state in self.goal if it is a list, as specified in the constructor. Override this
     method if checking against a single self.goal is not enough."""
     if isinstance(self.goal, list):
         return is_in(state, self.goal)
     else:
         return state == self.goal
def _filter(file_path):
    index, terms = Index(), ['allegori', 'symboli']

    for notation, data in parse(file_path):
        if notation.depth > 1:
            if is_in(terms, data['desc'].lower()) or \
                    notation.code[-1] == '0':
                index.add(notation, data, select=True)
                continue

        index.add(notation, data, select=False)

    return index
Esempio n. 9
0
def _query_synset(synset_name):
    synset = wordnet.synset(synset_name)

    for synset in _get_hypernyms(synset):
        gloss = word_tokenize(synset.definition().lower())

        if not is_in(['informal', 'slang', 'gossip'], gloss):
            lemmas = [
                lemma.replace('_', ' ').lower()
                for lemma in synset.lemma_names()
            ]

            yield synset, gloss, lemmas
def count_prescript(series_two, list_name, group_list_word):
    """
    计算症状团在药方数据中出现的次数
    :param series:
    :param list_name:
    :param group_list_word:
    :return:
    """
    group_list = utils.word_2_num(list_name, group_list_word)
    series_list,index_list = series_2_list(series_two, list_name)
    # utils.save_pickle('data.txt',series_list)
    count_list = []
    id_list = []
    for group in group_list:
        count = 0
        item_li = []
        for i,item in enumerate(series_list):
            if (utils.is_in(group, item)):
                # 如果该项包括聚类团
                item_li.append(index_list[i])
                count += 1
        count_list.append(count)
        id_list.append(item_li)
    '''对每个症状团对应的药方进行排序'''
    for list_index,item_li in enumerate(id_list):
        # query_rows = series_two.query('序号==' + str(li))
        if len(item_li)<3: continue
        query_rows = series_two.loc[series_two['序号'].isin(item_li)]
        med_all = []
        '''汇总该类药方所有药物'''
        for index,row in query_rows.iterrows():
            med = row['处方'].split('、')
            med_all.extend(med)
        count_result = Counter(med_all)
        med_list = [x for x in count_result if count_result[x]>1]      # 词频大于1的药物
        count_dict = {}
        for index,row in query_rows.iterrows():
            med = row['处方'].split('、')
            id = row['序号']
            count_dict[id] = len([x for x in med if x in med_list])     # 每个方剂包含的重要药物个数
        sorted_id = sorted(count_dict.items(),key=lambda x:x[1],reverse = True)    #方剂排序
        id_list[list_index] = [x[0] for x in sorted_id]
    data = utils.write_csv(['聚类', '数字', '数量','id'], 'data/count.csv', group_list_word, group_list, count_list,id_list)  # 保存的csv无法提取list
    utils.save_pickle('group_count.pkl',data)
    return data
Esempio n. 11
0
def group_clean(pkl_file):
    """
    把不同成员数的亲友团整理在一起
    :param pkl_file:
    :return:
    """
    group = utils.load_pickle(pkl_file)
    all_list = []
    for i in range(len(group) - 1, 0, -1):
        item = list(group[i])
        member_num = len(item[0])
        new_item = copy.deepcopy(item)  #用来进行删除操作的复制项
        for item_li in item:
            for li in all_list:
                if (utils.is_in(item_li, li) and item_li in new_item):
                    new_item.remove(item_li)
                    break
        all_list.extend(new_item)
    return all_list
def calculate(series, list_name, group_list_word):
    """
    统计显示每个药方对应的团
    :param series:
    :param list_name:
    :param group_list_word:
    :return:
    """
    group_list = utils.word_2_num(list_name, group_list_word)
    series_list,index_list = series_2_list(series, list_name)
    pattern_list = []
    for item in series_list:
        pattern = []
        for group in group_list:
            if (utils.is_in(group, item)):
                pattern.append(group)
        pattern_list.append(pattern)
    series_list = utils.num_2_word(list_name, series_list)
    pattern_list = utils.num_2_word(list_name, pattern_list)
    utils.write_csv(['主治', '功能团'], 'data/pattern.csv', series_list, pattern_list)
Esempio n. 13
0
    def _put_trash(self, env, row, column):
        movs = [(-1, 0), (-1, 1), (0, 1), (1, 1), (1, 0), (1, -1), (0, -1), (-1, -1), (0, 0)]

        for mov in movs[:]:
            if is_in(row + mov[0], column + mov[1], env):
                place = env[row + mov[0]][column + mov[1]]

                if len(place.objects):                    
                    movs.remove(mov)

            else:
                movs.remove(mov)
        
        if not len(movs):
            return env, False

        dirt_mov = choice(movs)
        
        env[row + dirt_mov[0]][column + dirt_mov[1]].add_object(4)
        return env, True
Esempio n. 14
0
def test_configure():
    from metrique.utils import configure

    assert configure() == {}

    config = dict(debug=100, OK='OK')

    defaults = dict(debug=False, log2file=False)

    options = dict(debug=20, log2file=None)  # when None, should be ignored

    config_file = os.path.join(etc, 'test_conf.json')
    # contents:
    #{   "file": true
    #    "debug": true,
    #    "log2file": true   }

    # first, only defaults
    x = configure(defaults=defaults)
    assert is_in(x, 'debug', False)
    assert is_in(x, 'log2file', False)

    # then, where opt is not None, override
    x = configure(defaults=defaults, options=options)
    assert is_in(x, 'debug', 20)
    assert is_in(x, 'log2file', False)  # ignored options:None value

    # update acts as 'template config' in place of {}
    # but options will override values set already...
    # so, except that we have a new key, this should
    # be same as the one above
    x = configure(update=config, defaults=defaults, options=options)

    assert is_in(x, 'debug', 20)
    assert is_in(x, 'log2file', False)  # ignored options:None value
    assert is_in(x, 'OK', 'OK')  # only in the template config

    # first thing loaded is values from disk, then updated
    # with 'update' config template
    # since log2file is set in config_file to True, it will
    # take that value
    x = configure(config_file=config_file,
                  update=config,
                  defaults=defaults,
                  options=options)
    assert is_in(x, 'debug', 20)
    assert is_in(x, 'log2file', True)  # ignored options:None value
    assert is_in(x, 'OK', 'OK')  # only in the template config
    assert is_in(x, 'file', True)  # only in the config_file config

    # cf is loaded first and update config template applied on top
    x = configure(config_file=config_file, update=config)
    assert is_in(x, 'debug', 100)
    assert is_in(x, 'log2file', True)  # ignored options:None value
    assert is_in(x, 'OK', 'OK')  # only in the template config
    assert is_in(x, 'file', True)  # only in the config_file config

    # cf is loaded first and update config template applied on top
    x = configure(config_file=config_file, options=options)
    assert is_in(x, 'debug', 20)
    assert is_in(x, 'log2file', True)  # ignored options:None value
    assert is_in(x, 'file', True)  # only in the config_file config

    # cf is loaded first and where key:values aren't set or set to
    # None defaults will be applied
    x = configure(config_file=config_file, defaults=defaults)
    assert is_in(x, 'debug', True)
    assert is_in(x, 'log2file', True)  # ignored options:None value
    assert is_in(x, 'file', True)  # only in the config_file config

    config_file = os.path.join(etc, 'test_conf_nested.json')
    # Contents are same, but one level nested under key 'metrique'
    x = configure(config_file=config_file,
                  defaults=defaults,
                  section_key='metrique',
                  section_only=True)
    assert is_in(x, 'debug', True)
    assert is_in(x, 'log2file', True)  # ignored options:None value
    assert is_in(x, 'file', True)  # only in the config_file config

    _x = x.copy()
    config_file = os.path.join(etc, 'test_conf_nested.json')
    # Contents are same, but one level nested under key 'metrique'
    x = configure(config_file=config_file,
                  defaults=defaults,
                  section_key='metrique')
    assert is_in(x, 'metrique', _x)

    try:  # should fail
        x = configure(config_file='I_DO_NOT_EXIST')
    except IOError:
        pass
    else:
        assert False

    # shouldn't fail, but instead, returns empty dict
    x = configure(config_file=config_file,
                  section_key='I_DO_NOT_EXIST',
                  section_only=True)
    assert x == {}

    for arg in ('update', 'options', 'defaults'):
        try:
            x = configure(**{arg: 'I_SHOULD_BE_A_DICT'})
        except AttributeError:
            pass
        else:
            assert False
Esempio n. 15
0
 def gt(item):
     ret = is_in(keywords, item)
     return 1 if ret else 0
Esempio n. 16
0
 def goal_test(self, state):
     if isinstance(self.goal, list):
         return is_in(state, self.goal)
     else:
         return state == self.goal
Esempio n. 17
0
    def _is_male(self):
        if is_in(self.terms, self._males) and not \
                is_in(self.terms, TERMS_FEMALE):
            return True

        return False
Esempio n. 18
0
 def _get_code(self):
     if not is_in(self.terms, EXCLUDE_TERMS):
         if self._is_female():
             return CODE_FEMALE
         elif self._is_male():
             return CODE_MALE
def recomment_main(insert):
    """
    推荐主程序,综合聚类结果和whoosh进行搜索推荐
    :param insert:
    :return:
    """
    # insert = ['带下赤白', '四肢乏力'] #全包含
    # insert = ['头痛', '赤白带下', '腹痛', '饮食减少', '经闭', '风寒']  # 团
    # insert = ['经闭', '头晕', '食症','恶心','风寒']#不包含

    list_name = utils.load_pickle('list_name.txt')
    series = utils.load_pickle('series.pkl')  # 症状series
    group_list = utils.group_clean2('data/group10.csv.pkl')
    group_count = clus4.count_prescript(series, list_name, group_list)
    # 存放索引的文件夹
    indexfile = 'index/'
    # 同义词词典路径
    dictfile = 'data/clean2.txt'
    # 方剂数据集路径
    # datasetfile = 'data/symptom_entity.csv'
    # 创建索引
    # who.get_index(datasetfile, indexfile)
    # 用户查询

    # 和查询
    my_query = who.add_synonym(dictfile, insert)
    result = who.get_recommend(indexfile, my_query)
    num_4_group = 2
    num_4_other = 2
    if (result):  # 如果可以直接匹配
        print('可以直接匹配')
        flag=0      #匹配团标签
        #使用症状团进行推理
        for index, group in enumerate(group_list):
            if (utils.is_in(insert,group) and len(insert)!=len(group)):
                flag=1
                ids = group_count.loc[index, 'id']
                print('症状团',group)
                print_info(ids,4)
        if not flag:        #不匹配症状团
            print_info(result,1)
        return 1, result
    else:
        # 从文件中初始化对象
        word_map = utils.gene_dic_2('data/clean2.txt')
        # insert标准化为同义词的标准词
        insert = [word_map[x] for x in insert]
        insert = utils.delete_duplicate(insert)  # 去重 不考虑重复词权重
        group_new = []
        # 查找insert包含的团
        match_symptom = []
        for index, group in enumerate(group_list):
            if (utils.is_in(group, insert)):
                match_symptom.extend(group)
                group_new.append(index)  # 团的列表
        group_new.sort(reverse=True)
        other_symptom = list(set(insert)-set(match_symptom))
        series_ids = []  # 症状团对应的方剂id
        group_clean = []  # 对团进行清理,删除重复包含的团
        for index in group_new:
            group = group_count.loc[index, '聚类']
            if (utils.is_in2(group, group_clean)):
                continue
            group_clean.append(group)
            ids = group_count.loc[index, 'id']
            print('药物团', group)
            # series_ids.append((group,ids))
            for i,id in enumerate(ids):
                if i>num_4_group-1:
                    break
                series_ids.append(str(id))
        # 存在对应的方剂
        if (len(series_ids) > 0):
            # 使用whoosh搜索剩余症状的方剂
            my_query = who.add_synonym2(dictfile, insert)  # or查询语句
            recommend_result = who.get_recommend(indexfile, my_query)
            recommend_result = recommend_result[:num_4_other]
            series_ids.extend(recommend_result)
            # series_ids = list(set(series_ids)) #顺序可能改变
            series_ids = utils.delete_duplicate(series_ids)  # 不会改变顺序
            print('存在症状团匹配')
            print_info(series_ids,2)
            return 2, series_ids
        # 没有对应的方剂
        else:
            print('没有症状团匹配,or搜索')
            my_query = who.add_synonym2(dictfile, insert)  # or查询语句
            result = who.get_recommend(indexfile, my_query)
            print_info(result,3)
            return 3, result
Esempio n. 20
0
def test_configure():
    from metrique.utils import configure

    assert configure() == {}

    config = dict(
        debug=100,
        OK='OK')

    defaults = dict(
        debug=False,
        log2file=False)

    options = dict(
        debug=20,
        log2file=None)  # when None, should be ignored

    config_file = os.path.join(etc, 'test_conf.json')
    # contents:
        #{   "file": true
        #    "debug": true,
        #    "log2file": true   }

    # first, only defaults
    x = configure(defaults=defaults)
    assert is_in(x, 'debug', False)
    assert is_in(x, 'log2file', False)

    # then, where opt is not None, override
    x = configure(defaults=defaults, options=options)
    assert is_in(x, 'debug', 20)
    assert is_in(x, 'log2file', False)  # ignored options:None value

    # update acts as 'template config' in place of {}
    # but options will override values set already...
    # so, except that we have a new key, this should
    # be same as the one above
    x = configure(update=config, defaults=defaults,
                  options=options)

    assert is_in(x, 'debug', 20)
    assert is_in(x, 'log2file', False)  # ignored options:None value
    assert is_in(x, 'OK', 'OK')  # only in the template config

    # first thing loaded is values from disk, then updated
    # with 'update' config template
    # since log2file is set in config_file to True, it will
    # take that value
    x = configure(config_file=config_file, update=config,
                  defaults=defaults, options=options)
    assert is_in(x, 'debug', 20)
    assert is_in(x, 'log2file', True)  # ignored options:None value
    assert is_in(x, 'OK', 'OK')  # only in the template config
    assert is_in(x, 'file', True)  # only in the config_file config

    # cf is loaded first and update config template applied on top
    x = configure(config_file=config_file, update=config)
    assert is_in(x, 'debug', 100)
    assert is_in(x, 'log2file', True)  # ignored options:None value
    assert is_in(x, 'OK', 'OK')  # only in the template config
    assert is_in(x, 'file', True)  # only in the config_file config

    # cf is loaded first and update config template applied on top
    x = configure(config_file=config_file, options=options)
    assert is_in(x, 'debug', 20)
    assert is_in(x, 'log2file', True)  # ignored options:None value
    assert is_in(x, 'file', True)  # only in the config_file config

    # cf is loaded first and where key:values aren't set or set to
    # None defaults will be applied
    x = configure(config_file=config_file, defaults=defaults)
    assert is_in(x, 'debug', True)
    assert is_in(x, 'log2file', True)  # ignored options:None value
    assert is_in(x, 'file', True)  # only in the config_file config

    config_file = os.path.join(etc, 'test_conf_nested.json')
    # Contents are same, but one level nested under key 'metrique'
    x = configure(config_file=config_file, defaults=defaults,
                  section_key='metrique', section_only=True)
    assert is_in(x, 'debug', True)
    assert is_in(x, 'log2file', True)  # ignored options:None value
    assert is_in(x, 'file', True)  # only in the config_file config

    _x = x.copy()
    config_file = os.path.join(etc, 'test_conf_nested.json')
    # Contents are same, but one level nested under key 'metrique'
    x = configure(config_file=config_file, defaults=defaults,
                  section_key='metrique')
    assert is_in(x, 'metrique', _x)

    try:  # should fail
        x = configure(config_file='I_DO_NOT_EXIST')
    except IOError:
        pass
    else:
        assert False

    # shouldn't fail, but instead, returns empty dict
    x = configure(config_file=config_file, section_key='I_DO_NOT_EXIST',
                  section_only=True)
    assert x == {}

    for arg in ('update', 'options', 'defaults'):
        try:
            x = configure(**{arg: 'I_SHOULD_BE_A_DICT'})
        except AttributeError:
            pass
        else:
            assert False
Esempio n. 21
0
def main_(params):
    # can be called using dictobj.DictionaryObject({'metric': 'euclidean'}) or
    # str_to_params('--output_folder result3 --metric euclidean --input_folder ../codes_short/ --files_limit 100 --max_features 2000')
    # for construction of params object
    list_of_tokens = get_vocab(params.select_top_tokens,
                               'short_sorted_freq_list.txt')

    vector_path = join(params.output_folder, 'vectors/vectors.npz')
    tfidf_path = join(params.output_folder, 'tfidf.npz')
    distances_path = join(params.output_folder, 'distances.npz')
    vectors_all_path = os.path.join(params.output_folder, f'vectors_all.npz')
    vectors_folder = join(params.output_folder)

    if params.cores_to_use == -1:
        params.cores_to_use = multiprocessing.cpu_count()
    true_cores = params.cores_to_use
    params.cores_to_use = 1
    s = params.files_limit_start
    e = min([
        params.files_limit_end, 111000,
        len(os.listdir(os.path.join(params.input_folder, 'tokenized1')))
    ])
    params.select_functions_limit = min(params.select_functions_limit, e)
    q = Queue()

    if 'vectors' in params.stages_to_run or (
            not os.path.exists(vectors_all_path) and is_in(
                ['tfidf', 'distances', 'clustering'], params.stages_to_run)):
        count = s
        q_len = 0
        while count < e:
            if not os.path.exists(vector_path[:-4] + str(count) + '.npz'):
                q.put(count)
                q_len += 1
            count += params.files_limit_step
        if q_len > 0:
            multi_process_run(UserProcessTask(params, list_of_tokens, q),
                              true_cores)
        bow_matrix, lists, all_ends_raw, gt_values, filenames_list, all_vulnerabilities, all_start_raw, vocab, idf, all_functions_count = load_vectors_iter_folder(
            e,
            s,
            params.files_limit_step,
            vector_path,
            params.output_folder,
            params.security_keywords,
            indices=params.select_functions_limit)
        np.savez_compressed(vectors_all_path,
                            bow_matrix=bow_matrix,
                            lists=lists,
                            all_start_ends=all_ends_raw,
                            gt_values=gt_values,
                            filenames_list=filenames_list,
                            all_vulnerabilities=all_vulnerabilities,
                            all_start_raw=all_start_raw,
                            idf=idf,
                            all_functions_count=all_functions_count)
        upload_to_gcp(params)
    q.close()

    if 'tfidf' in params.stages_to_run or (
            not os.path.exists(tfidf_path)
            and is_in(['distances', 'clustering'], params.stages_to_run)):
        if 'vectors' not in params.stages_to_run:
            # data = load_vectors(vector_path)
            # bow_matrix, lists, raw_lists, gt_values, filenames_list,\
            #    all_vulnerabilities, all_start_raw, vocab = data
            bow_matrix, lists, all_ends_raw, gt_values, filenames_list, all_vulnerabilities, all_start_raw, vocab, idf, all_functions_count = load_vectors_iter(
                vectors_folder
            )  # load_vectors_iter_folder(e,s, params.files_limit_step, vector_path, indices=params.select_functions_limit)
        # intersting_indices = np.array(list(range(len(lists))))
        # if scipy.sparse.issparse(bow_matrix):
        #    matrix = bow_matrix.toarray()
        if params.vectorizer == 'count' and params.matrix_form == 'tfidf':
            matrix = count_to_tfidf(bow_matrix, idf, all_functions_count)
        np.savez_compressed(tfidf_path, matrix=matrix)
        upload_to_gcp(params)

    if 'distances' in params.stages_to_run or (
            not os.path.exists(distances_path)
            and is_in(['clustering'], params.stages_to_run)):
        if 'tfidf' not in params.stages_to_run:
            matrix = np.load(tfidf_path)['matrix']
        distances = pdist(matrix, metric=params.metric)
        np.savez_compressed(distances_path, distances=distances)
        upload_to_gcp(params)

    if 'clustering' in params.stages_to_run:
        if 'vectors' not in params.stages_to_run and 'tfidf' not in params.stages_to_run:
            bow_matrix, lists, all_ends_raw, gt_values, filenames_list, all_vulnerabilities, all_start_raw, vocab, idf, all_functions_count = load_vectors_iter(
                vectors_folder)
        if 'tfidf' not in params.stages_to_run:
            matrix = np.load(tfidf_path)['matrix']
        if 'distances' not in params.stages_to_run:
            distances = np.load(distances_path)['distances']
        analyze_functions2(distances, matrix, lists, all_ends_raw, params,
                           gt_values, filenames_list, all_vulnerabilities,
                           all_start_raw)
        upload_to_gcp(params)
    print('finished')
    if params.shutdown:
        subprocess.run('sudo shutdown',
                       shell=True)  # sudo shutdown 0 on aws machines
Esempio n. 22
0
 def goal_test(self, state):
     """Return True if the state is a goal."""
     if isinstance(self.goal, list):
         return is_in(state, self.goal)
     else:
         return state == self.goal