예제 #1
0
def inner_loop(artifact_name):
    web_interface = WebDB()
    symbols = web_interface.get_artifact_symbols(artifact_name)
    all_symbol_tables = {}
    for top_level_name, keys in groupby(sorted(symbols),
                                        lambda x: x.partition(".")[0].lower()):
        print(top_level_name)
        # carve out for star imports which don't have dots
        if top_level_name == "*":
            continue
        # download the existing symbol table
        symbol_table_with_metadata = web_interface.get_symbol_table(
            top_level_name)
        symbol_table = symbol_table_with_metadata.get("symbol table", {})
        metadata = symbol_table_with_metadata.get("metadata", {})
        # update the symbol table
        for k in list(symbols):
            symbol_table.setdefault(k, []).append(artifact_name)
        # add artifacts to metadata
        metadata["version"] = version
        metadata.setdefault("indexed artifacts", []).append(artifact_name)
        # push back to server
        web_interface.push_symbol_table(top_level_name, {
            "symbol table": symbol_table,
            "metadata": metadata
        })
        all_symbol_tables[top_level_name] = symbol_table
    return all_symbol_tables
def inner_loop(artifact_name):
    web_interface = WebDB()
    symbols = web_interface.get_artifact_symbols(artifact_name)
    all_symbol_tables = {}
    for top_level_name, keys in groupby(sorted(symbols),
                                        lambda x: x.partition(".")[0].lower()):
        print(top_level_name)
        # carve out for star imports which don't have dots
        if top_level_name == "*":
            continue
        # download the existing symbol table metadata
        metadata = web_interface.get_symbol_table_metadata(
            top_level_name=top_level_name)
        if artifact_name in metadata.get("indexed artifacts", []):
            continue
        # download the existing symbol table
        symbol_table_with_metadata = web_interface.get_symbol_table(
            top_level_name)
        symbol_table = symbol_table_with_metadata.get("symbol table", {})
        # update the symbol table
        for k in list(keys):
            symbol_table_entry_value = {"artifact name": artifact_name}
            shadows = symbols[k].get("data", {}).get("shadows")
            if shadows:
                symbol_table_entry_value.update(shadows=shadows)
            symbol_table.setdefault(k, []).append(symbol_table_entry_value)
        # add artifacts to metadata
        metadata["version"] = version
        metadata.setdefault("indexed artifacts", []).append(artifact_name)
        # push back to server
        try:
            web_interface.push_symbol_table(top_level_name, {
                "symbol table": symbol_table,
                "metadata": metadata
            })
        except requests.RequestException as e:
            print(e)
        all_symbol_tables[top_level_name] = symbol_table
    return all_symbol_tables
예제 #3
0
"""tools for matching the volumes with artifacts that supply the symbols"""
from concurrent.futures._base import as_completed
from concurrent.futures.thread import ThreadPoolExecutor
from itertools import groupby

from symbol_exporter.ast_symbol_extractor import builtin_symbols
from symbol_exporter.db_access_model import WebDB

web_interface = WebDB()


def get_supply(top_level_import,
               v_symbols,
               get_symbol_table_func=web_interface.get_symbol_table):
    supplies = None
    bad_symbols = set()
    symbol_table = get_symbol_table_func(top_level_import)
    # TODO: handle star imports recursion here?
    for v_symbol in v_symbols:
        supply = symbol_table.get(v_symbol)
        if not supply:
            bad_symbols.add(v_symbol)
            continue
        if supplies is None:
            supplies = set(supply)
        else:
            supplies &= set(supply)
    return supplies or set(), bad_symbols


def find_supplying_version_set(
예제 #4
0
        for k in list(symbols):
            symbol_table.setdefault(k, []).append(artifact_name)
        # add artifacts to metadata
        metadata["version"] = version
        metadata.setdefault("indexed artifacts", []).append(artifact_name)
        # push back to server
        web_interface.push_symbol_table(top_level_name, {
            "symbol table": symbol_table,
            "metadata": metadata
        })
        all_symbol_tables[top_level_name] = symbol_table
    return all_symbol_tables


if __name__ == "__main__":
    web_interface = WebDB()
    extracted_artifacts = web_interface.get_current_symbol_table_artifacts()
    all_artifacts = web_interface.get_current_extracted_pkgs().values()

    artifacts_to_index = list(set(all_artifacts) - set(extracted_artifacts))
    print(f"Number of artifacts to index: {len(artifacts_to_index)}")

    # The shuffle here is to try to not have two threads running on the same symbol table json at once if possible
    shuffle(artifacts_to_index)
    pool = ThreadPoolExecutor()
    # Note that this is a race condition here, two threads could try to write to the same symbol table
    # however one of those will win so next round there will be one added safely and this continues
    # until none are left to be added
    print("issuing futures")
    futures = {
        pool.submit(inner_loop, artifact_name): artifact_name
        except requests.RequestException as e:
            print(e)
        all_symbol_tables[top_level_name] = symbol_table
    return all_symbol_tables


def invert_dict(d: dict):
    return_dict = defaultdict(set)
    for k, v in d.items():
        for vv in v:
            return_dict[vv].add(k)
    return dict(return_dict)


if __name__ == "__main__":
    web_interface = WebDB()
    indexed_artifacts_by_top_symbol = web_interface.get_current_symbol_table_artifacts_by_top_level(
    )
    all_artifacts = web_interface.get_all_extracted_artifacts()
    with Client(threads_per_worker=100):
        compute = db.from_sequence(all_artifacts).map(
            web_interface.get_top_level_symbols).compute()
    all_symbols_by_artifact = {k: v for k, v in zip(all_artifacts, compute)}
    all_artifacts_by_symbol = invert_dict(all_symbols_by_artifact)

    artifacts_to_index = set()
    for symbol, artifacts_set in all_artifacts_by_symbol.items():
        artifacts_to_index.update(
            artifacts_set -
            indexed_artifacts_by_top_symbol.get(symbol.lower(), set()))
    artifacts_to_index = list(artifacts_to_index)