Python deduplicateの例、utils.deduplicate Pythonの例

コード例 #1

0

ファイルを表示

def correct_tags(info, context):
    info["tags"] = ' '.join(
        utils.deduplicate(
            filter(None, [
                tag.strip(PROHIBITED_TRAILING_SYMBOLS)
                for tag in info["tags"].split()
            ])))

コード例 #2

0

ファイルを表示

def get_info_from_sbsar_xml(xml_file):
    with open(xml_file, 'r', encoding="utf-8") as xml_text:
        from bs4 import BeautifulSoup
        soup = BeautifulSoup(xml_text.read(), "html.parser")
        graph = soup.find("graph")
        attrs = graph.attrs  # type: dict

        tags = []
        keywords = attrs.get("keywords")
        if keywords:
            tags = re.split(r" |;|,", keywords.strip("; ").lower())

        category = attrs.get("category")
        if category:
            tags.extend(re.split(r" |/|,", category.lower()))

        tags = utils.deduplicate(tags)
        tags = list(filter(None, tags))

        id = None
        pkgurl = attrs.get("pkgurl")
        if pkgurl:
            match = re.search(r"(?<=pkg:\/\/).+", pkgurl)
            if match:
                id = match.group(0)

        if id:
            name = id
        else:
            name = os.path.splitext(os.path.basename(xml_file))[0]
        label = attrs.get("label")
        if label:
            name = label.strip(" ")

        dimensions = {}
        physicalsize = attrs.get("physicalsize")
        if physicalsize:
            for letter, dimension in zip('xyz', physicalsize.split(",")):
                dimensions[letter] = float(dimension) / 100.0

        info = {
            "id": id,
            "name": name,
            # "url": "",
            "author": attrs.get("author", ""),
            "author_url": attrs.get("authorurl", ""),
            # "licence": "",
            # "licence_url": "",
            "tags": tags,
            # "preview_url": "",
            "description": attrs.get("description", ""),
            "dimensions": dimensions,
            "xml_attrs": attrs
        }

        utils.remove_empty(info)
        return info

コード例 #3

0

ファイルを表示

def update_list_info(property_name, id, info, context):
    try:
        asset_to_update = context.window_manager.at_asset_data[
            id]  # type: Asset
    except:
        info[property_name] = ""
        return
    tag_list = utils.deduplicate(
        filter(None, [
            tag.strip(PROHIBITED_TRAILING_SYMBOLS).lower()
            for tag in info[property_name].split()
        ]))
    info[property_name] = ' '.join(tag_list)
    if set(asset_to_update.info[property_name]) != set(tag_list):
        asset_to_update.info[property_name] = tag_list
        asset_to_update.update_info()
        global current_search_query
        current_search_query = None
        update_search(context.window_manager, None)

コード例 #4

0

ファイルを表示

ファイル: works.py プロジェクト: cuhk-lambda/rzlinkhelper

def do_process(data, settings):
    # Preparing directories
    utils.checkDir(utils.GET("object_dir"), "Object")
    if utils.GET("toposort_verbose_logging_dir") is not None and utils.GET(
            "toposort_verbose_logging_dir") != "":
        utils.checkDir(utils.GET("toposort_verbose_logging_dir"),
                       "Toposort verbose logging")
    originalCXX = utils.GET("original_cxx_executable")
    originalCC = utils.GET("original_cc_executable")

    finishedList = Manager().list()

    totalLength = len(data["compile"])
    compileTaskPool = Pool()
    console.log("Compiling .o (total: {})".format(totalLength))
    for r in range(totalLength):
        i = data["compile"][r]
        execname = "(unknown)"
        cmdline = list(filter(lambda x: x != "", i.split(" ")))
        filehashpath = ["0" for i in range(0, 40)]
        for argnum in range(len(cmdline)):
            if cmdline[argnum] == originalCXX:
                cmdline[argnum] = utils.GET("targeted_cxx_executable")
                cmdline[argnum] += " -emit-llvm"
            elif cmdline[argnum] == originalCC:
                cmdline[argnum] = utils.GET("targeted_cc_executable")
                cmdline[argnum] += " -emit-llvm"
            elif cmdline[argnum] == "-o":
                filepath = realpath(cmdline[argnum + 1])
                filehashpath = utils.sha1sum(filepath)
                sha1Table[filehashpath] = filepath
                cmdline[argnum + 1] = realpath(
                    utils.GET("object_dir") + "/" + filehashpath)
                execname = utils.findName(filepath)
            elif cmdline[argnum] == "-c":
                cmdline[argnum] = "-S"
            elif cmdline[argnum] == "-g":
                cmdline[argnum] = ""
        command = " ".join(cmdline)
        compileTaskPool.apply_async(single_compile,
                                    args=(command, filehashpath, execname, r,
                                          totalLength, finishedList,
                                          settings.clean),
                                    error_callback=console_error_and_exit)
    compileTaskPool.close()
    compileTaskPool.join()

    # Construct the graph
    console.success("All object files are compiled.")

    console.info("Preparing linking relationships")
    graphData = data["scripts"]

    for i in graphData:
        itemPath = i["target"]["abs_path"]
        hashedItemPath = utils.sha1sum(itemPath)
        sha1Table[hashedItemPath] = itemPath
        itemDependencies = i["target"]["dependencies"]
        dependencyList[hashedItemPath] = utils.deduplicate(
            utils.pathToSha1(itemDependencies, sha1Table))
        if hashedItemPath in dependencyList[hashedItemPath]:
            console.warn("Self-circle found. Ignoring.")
            dependencyList[hashedItemPath].remove(hashedItemPath)

    preserveProcess = utils.GET("preserve_process")
    if preserveProcess != None and preserveProcess != "":
        console.info("Saving metadata")
        sha1FilePath = utils.GET("object_dir") + "/" + preserveProcess
        try:
            json.dump(
                sha1Table,
                open(utils.GET("object_dir") + "/" + preserveProcess, "w"))
            console.success("Metadata saved.")
        except PermissionError:
            console.warn(
                "Process file {} is not writable, while preseve_process is on."
                .format(sha1FilePath))

    console.info("Calculating linking sequence")
    try:
        currList = utils.topoSort(dependencyList, finishedList, sha1Table)
    except ValueError:
        console.error("Topo sort failed to complete. Please check your data.")
        sys.exit(1)
    console.success("Linking sequence calculated.")

    if settings.clean or settings.clean_linking:
        console.info("Cleaning linking targets")
        for i in dependencyList.keys():
            if os.access(utils.GET("object_dir") + "/" + i, os.W_OK):
                os.unlink(utils.GET("object_dir") + "/" + i)
        console.success("Linking targets cleaned.")

    if len(currList) != len(graphData):
        console.warn("Bad consistance on linking recipe")
    console.debug("Linking sequence:", currList, "or",
                  list(map(lambda x: sha1Table[x], currList)))
    console.info("Start linking")
    ctrLen = len(currList)
    p = Pool()
    for idx, obj in enumerate(currList):
        console.info("Linking {} ({})  [{}/{}]".format(sha1Table[obj], obj,
                                                       idx + 1, ctrLen))
        p.apply_async(single_linking,
                      args=(obj, finishedList),
                      error_callback=console_error_and_exit)
    p.close()
    p.join()
    console.success("All targets are linked.")
    console.success("Finished.")

コード例 #5

0

ファイルを表示

def get_web_texturehaven_info(url, content_folder):
    # https://texturehaven.com/tex/?t=brick_wall_003
    url = url.split("#")[0]

    if not "texturehaven.com/tex/" in url:
        return False, "Not valid Texture Haven url."

    match = re.search(r"(?<=t=)[a-zA-Z0-9_]+", url)
    id = match.group(0)

    import requests
    response = requests.get(url)
    if response.status_code != 200:
        return False, response.text

    from bs4 import BeautifulSoup
    soup = BeautifulSoup(response.text, 'html.parser')

    dimensions = {}
    tags = []

    for item in soup.find(name="div", id="item-info").findAll("li"):
        title = item.get("title")

        if not title:
            b = item.find('b')
            if b:
                title = b.string

        if title:

            if title.startswith("Author"):
                author = title.split(":")[1].strip()
                author_url = f"https://texturehaven.com/textures/?a={author}"

            elif title.startswith("Real-world"):
                dimensions_title = title.split(":")[1].strip()
                number_pattern = re.compile("\d+\.?\d*")
                for letter, number in zip(
                        'xyz', number_pattern.findall(dimensions_title)):
                    dimensions[letter] = float(number)

            elif title.startswith(("Categories", "Tags")):
                tags.extend(
                    [a.string.lower().strip() for a in item.findAll("a")])

    preview_url = "https://texturehaven.com" + soup.find(
        name="div", id="item-preview").find("img")["src"]

    info = {
        "id": id,
        "name": id,
        "url": url,
        "author": author,
        "author_url": author_url,
        "licence": "CC0",
        "licence_url": "https://texturehaven.com/p/license.php",
        "tags": tags,
        "preview_url": preview_url,
        # "description": "",
        "dimensions": dimensions,
    }

    utils.remove_empty(info)

    if content_folder:
        downloads = []

        # for a in soup.findAll("a"):
        #     if a.get("download"):
        #         href = a["href"]
        #         if "/png/4k/" in href:
        #             name = href.split("/")[-1].lower()
        #             type = get_type(name)
        #             if type and len(type) == 1 and type[0] in ('diffuse', 'albedo', 'displacement', 'normal', 'roughness', 'ambient_occlusion'):
        #                 downloads.append("https://texturehaven.com" + href)

        for a in soup.findAll("a"):
            if a.get("download"):
                href = a["href"]
                if "/4k/" in href:
                    name = href.split("/")[-1].lower()
                    type = type_definer.get_type(
                        name, config={"is_rgb_plus_alpha": True})
                    if not type or len(type) != 1:
                        continue
                    type = type[0]
                    if ("/jpg/4k/" in href and type in
                        ('diffuse', 'albedo', 'normal', 'roughness',
                         'ambient_occlusion')) or ("/png/4k/" in href and type
                                                   in ('displacement', )):
                        downloads.append("https://texturehaven.com" + href)

        for download in downloads.copy():
            if "dx_normal" in download.lower():
                for _download in downloads.copy():
                    if "gl_normal" in _download.lower():
                        downloads.remove(download)

        downloads = utils.deduplicate(downloads)
        info["downloads"] = downloads

    return True, info

コード例 #6

0

ファイルを表示

ファイル: search_eval.py プロジェクト: zjwind/Lancer

                    extend_token_len]  # used for debug
                ## mode2, it seems that mode2 slightly outperforms mode 1 in T3+
                extend_query = extend_query_builder.build_query(
                    text_tokens, inferred_text_tokens, max_size * 10)
                search_results = retriever.search_snippets(extend_query)

                # ## mode 1
                # basic_query = basic_query_builder.build_query(text_tokens, max_size * 5)
                # extend_query = extend_query_builder.build_query(text_tokens, inferred_text_tokens, max_size * 5)
                # search_results = retriever.search_snippets(basic_query) + retriever.search_snippets(extend_query)

            else:
                basic_query = basic_query_builder.build_query(
                    text_tokens, max_size * 10)
                search_results = retriever.search_snippets(basic_query)
            search_results = deduplicate(snippet, search_results)

            if user_bert:
                if short_mode:
                    ## short-bert mode
                    query_snippet_text = build_short_mode_text(snippet)
                    candidate_texts = [
                        build_short_mode_text(res) for res in search_results
                    ]
                    scores = short_bert_manager.rank(query_snippet_text,
                                                     candidate_texts)
                else:
                    ## full-bert mode
                    query_snippet_text = " ".join(
                        ParserUtil.extractNLwords(text_tokens))
                    candidate_texts = [

コード例 #7

0

ファイルを表示

ファイル: remote_server.py プロジェクト: zjwind/Lancer

    def search_codes(self):
        rawbody = cherrypy.request.body.read(
            int(cherrypy.request.headers['Content-Length']))
        jsonbody = json.loads(rawbody)
        code_context_tokens = jsonbody['codeContextTokens']
        snippet = jsonbody['snippet']
        user_bert = jsonbody['useBert']

        text_tokens = snippet['tokenSequence']
        if self.do_extend:
            inferred_text_tokens = self.lm_infer.infer(code_context_tokens,
                                                       text_tokens,
                                                       self.extend_token_len)
            extend_query = self.extend_query_builder.build_query(
                text_tokens, inferred_text_tokens, self.max_size * 10)
            search_results = self.retriever.search_snippets(extend_query,
                                                            with_score=True)
        else:
            basic_query = self.basic_query_builder.build_query(
                text_tokens, self.max_size * 10)
            search_results = self.retriever.search_snippets(basic_query,
                                                            with_score=True)

        distinct_results = deduplicate(snippet,
                                       search_results,
                                       with_score=True)
        if user_bert and self.args.use_bert:
            if len(snippet['lineCodes']) <= 2:
                ## short-bert mode
                # query_snippet_text = build_short_mode_text(snippet)
                # candidate_texts = [build_short_mode_text(res) for res, _ in search_results]
                query_snippet_text = " | ".join([
                    " ".join(ParserUtil.extractNLwords([snippet['className']
                                                        ])),
                    " ".join(ParserUtil.extractNLwords([snippet['methodName']
                                                        ]))
                ])
                candidate_texts = [
                    " | ".join([
                        " ".join(ParserUtil.extractNLwords([res['className']
                                                            ])),
                        " ".join(ParserUtil.extractNLwords([res['methodName']
                                                            ]))
                    ]) for res, _ in distinct_results
                ]
                scores = self.short_bert_manager.rank(query_snippet_text,
                                                      candidate_texts)
            else:
                ## full-bert mode
                query_snippet_text = " ".join(
                    ParserUtil.extractNLwords(snippet['tokenSequence']))
                candidate_texts = [
                    " ".join(ParserUtil.extractNLwords(res['tokenSequence']))
                    for res, _ in distinct_results
                ]
                scores = self.full_bert_manager.rank(query_snippet_text,
                                                     candidate_texts)
            sorted_scores = sorted([(i, score)
                                    for i, score in enumerate(scores)],
                                   key=lambda d: d[1],
                                   reverse=True)

            tmp_indices = []
            for i, score in sorted_scores[:self.max_size]:
                if score >= 0.0:
                    tmp_indices.append(i)
                else:
                    tmp_index_set = set(tmp_indices)
                    for idx in range(min(self.max_size, len(sorted_scores))):
                        if idx not in tmp_index_set:
                            tmp_indices.append(idx)
                    break
            distinct_results = [distinct_results[idx] for idx in tmp_indices]

        distinct_results = distinct_results[:self.max_size]
        distinct_results = [{
            'methodInfo': res[0],
            'score': float(res[1])
        } for res in distinct_results]

        response = json.dumps(distinct_results)

        print(" ".join(text_tokens))
        print("res size:", len(distinct_results))
        method_ids = [(i + 1, res['methodInfo']['methodId'])
                      for i, res in enumerate(distinct_results)]
        print(method_ids)
        print('=' * 80)

        return response