コード例 #1
0
ファイル: translator_kgx.py プロジェクト: vemonet/kgx
def validate(config: dict, path: str, output: str, output_dir: str,
             format: str):
    """
    Run KGX validation on an input file to check for BioLink Model compliance.
    \f

    Parameters
    ----------
    config: dict
        A dictionary containing the configuration for kgx.cli
    path: str
        Path to input file
    output: str
        Path to output file
    output_dir:
        Path to a directory
    format:
        The input format

    """
    t = None
    if format:
        t = get_transformer(format)()
    else:
        t = get_transformer(get_type(path))()
    t.parse(path, input_format=format)
    validator = Validator()
    errors = validator.validate(t.graph)
    validator.write_report(errors, open(output, 'w'))
コード例 #2
0
def main(input_path, output_path, biolink_model_only):
    """
    Uses ontobio to load ontologies and choose the best biolink model term
    for a node category or edge label.
    """
    input_transformer = get_transformer(get_type(input_path))()
    output_transformer = get_transformer(get_type(output_path))()
    input_transformer.parse(input_path)
    G = input_transformer.graph

    for n, data in G.nodes(data=True):
        if 'category' in data and isinstance(data['category'],
                                             (tuple, list, set)):
            for category in data['category']:
                if ':' in category:
                    curie = make_curie(category)
                    prefix, _ = curie.lower().rsplit(':', 1)
                    ontologies[prefix] = None

    for u, v, data in G.edges(data=True):
        if 'edge_label' in data and ':' in data['edge_label']:
            curie = make_curie(data['edge_label'])
            prefix, _ = curie.lower().rsplit(':', 1)
            ontologies[prefix] = None

    print(ontologies)

    for key in ontologies.keys():
        print(key)
        ontologies[key] = get_ontology(key)

    with click.progressbar(G.nodes(data=True)) as bar:
        for n, data in bar:
            if 'category' in data and isinstance(data['category'],
                                                 (list, set, tuple)):
                l = [
                    get_term(make_curie(c), biolink_model_only)
                    for c in data['category'] if ':' in c
                ]
                l += [c for c in data['category'] if ':' not in c]
                l = [x.replace('_', ' ') for x in l if x is not None]
                data['category'] = l
            elif 'category' not in data:
                data['category'] = ['named thing']

    with click.progressbar(G.edges(data=True)) as bar:
        for u, v, data in bar:
            if 'edge_label' in data and ':' in data['edge_label']:
                c = make_curie(data['edge_label'])
                data['edge_label'] = get_term(c, biolink_model_only)
                data['valid_edge_label'] = bmt.get_predicate(
                    data['edge_label']) is not None
            if 'edge_label' not in data or data['edge_label'] is None:
                data['edge_label'] = 'related_to'
            data['edge_label'] = data['edge_label'].replace(' ', '_')

    output_transformer.graph = G
    print('Saving to {}'.format(output_path))
    output_transformer.save(output_path)
コード例 #3
0
ファイル: merge_kg.py プロジェクト: shenfcStanley/kg-covid-19
def load_and_merge(yaml_file: str) -> Transformer:
    """Load and merge sources defined in the config YAML.

    Args:
        yaml_file: A string pointing to a KGX compatible config YAML.

    Returns:
        kgx.Transformer: The merged transformer that contains the merged graph.

    """
    config = parse_load_config(yaml_file)
    transformers: List = []

    # read all the sources defined in the YAML
    for key in config['target']:
        target = config['target'][key]
        logging.info("Loading {}".format(key))
        if target['type'] in get_file_types():
            # loading from a file
            transformer = get_transformer(target['type'])()
            for f in target['filename']:
                transformer.parse(f, input_format='tsv')
            transformers.append(transformer)
        elif target['type'] == 'neo4j':
            transformer = NeoTransformer(None, target['uri'],
                                         target['username'],
                                         target['password'])
            transformer.load()
            transformers.append(transformer)
        else:
            logging.error("type {} not yet supported".format(target['type']))

    # merge all subgraphs into a single graph
    merged_transformer = Transformer()
    merged_transformer.merge_graphs([x.graph for x in transformers])
    merged_transformer.report()

    # write the merged graph
    if 'destination' in config:
        destination = config['destination']
        if destination['type'] in ['csv', 'tsv', 'ttl', 'json', 'tar']:
            destination_transformer = get_transformer(destination['type'])(
                merged_transformer.graph)
            destination_transformer.save(destination['filename'],
                                         extension=destination['type'])
        elif destination['type'] == 'neo4j':
            destination_transformer = NeoTransformer(
                merged_transformer.graph,
                uri=destination['uri'],
                username=destination['username'],
                password=destination['password'])
            destination_transformer.save_with_unwind()
        else:
            logging.error(
                "type {} not yet supported for KGX load-and-merge operation.".
                format(destination['type']))

    return merged_transformer
コード例 #4
0
def load_and_merge(config: dict, load_config):
    """
    Load nodes and edges from files and KGs, as defined in a config YAML, and merge them into a single graph.
    The merge happens in-memory. This merged graph can then be written to a local/remote Neo4j instance
    OR be serialized into a file.
    \f

    .. note::
        Everything here is driven by the ``load_config`` YAML.

    Parameters
    ----------
    """

    with open(load_config, 'r') as YML:
        cfg = yaml.load(YML, Loader=yaml.FullLoader)

    transformers = []
    for key in cfg['target']:
        target = cfg['target'][key]
        logging.info("Loading {}".format(key))
        if target['type'] in get_file_types():
            # loading from a file
            transformer = get_transformer(target['type'])()
            transformer.parse(target['filename'])
            transformers.append(transformer)
        elif target['type'] == 'neo4j':
            transformer = kgx.NeoTransformer(None, target['uri'],
                                             target['username'],
                                             target['password'])
            # TODO: support filters
            transformer.load()
            transformers.append(transformer)
        else:
            logging.error(
                "type {} not yet supported for KGX load-and-merge operation.".
                format(target['type']))

    merged_transformer = Transformer()
    merged_transformer.merge_graphs([x.graph for x in transformers])

    destination = cfg['destination']
    if destination['type'] in ['csv', 'tsv', 'ttl', 'json', 'tar']:
        destination_transformer = get_transformer(destination['type'])()
        destination_transformer.save(destination['filename'])
    elif destination['type'] == 'neo4j':
        destination_transformer = kgx.NeoTransformer(
            merged_transformer.graph,
            uri=destination['uri'],
            username=destination['username'],
            password=destination['password'])
        destination_transformer.save_with_unwind()
    else:
        logging.error(
            "type {} not yet supported for KGX load-and-merge operation.".
            format(destination['type']))
コード例 #5
0
ファイル: translator_kgx.py プロジェクト: cnbennett3/kgx
def load_transformer(input_paths:List[str], input_type:str=None) -> Transformer:
    """
    Creates a transformer for the appropriate file type and loads the data into
    it from file.
    """
    if input_type is None:
        input_types = [get_type(i) for i in input_paths]
        for t in input_types:
            if input_types[0] != t:
                error(
                """
                Each input file must have the same file type.
                Try setting the --input-type parameter to enforce a single
                type.
                """
                )
            input_type = input_types[0]

    transformer_constructor = get_transformer(input_type)

    if transformer_constructor is None:
        error('Inputs do not have a recognized type: ' + str(get_file_types()))

    t = transformer_constructor()
    for i in input_paths:
        t.parse(i, input_type)

    t.report()

    return t
コード例 #6
0
ファイル: translator_kgx.py プロジェクト: cnbennett3/kgx
def transform_and_save(t:Transformer, output_path:str, output_type:str=None):
    """
    Creates a transformer with the appropraite file type from the given
    transformer, and applies that new transformation and saves to file.
    """
    if output_type is None:
        output_type = get_type(output_path)

    output_transformer = get_transformer(output_type)

    if output_transformer is None:
        error('Output does not have a recognized type: ' + str(get_file_types()))

    kwargs = {
        'extention' : output_type
    }

    w = output_transformer(t.graph)
    result_path = w.save(output_path, **kwargs)

    if result_path is not None and os.path.isfile(result_path):
        click.echo("File created at: " + result_path)
    elif os.path.isfile(output_path):
        click.echo("File created at: " + output_path)
    else:
        error("Could not create file.")
コード例 #7
0
ファイル: translator_kgx.py プロジェクト: cnbennett3/kgx
def build_transformer(path:str, input_type:str=None) -> Transformer:
    if input_type is None:
        input_type = get_type(path)
    constructor = get_transformer(input_type)
    if constructor is None:
        error('File does not have a recognized type: ' + str(get_file_types()))
    return constructor()
コード例 #8
0
ファイル: translator_kgx.py プロジェクト: justaddcoffee/kgx
def validate(config, path, input_type, output_dir, record_size):
    os.makedirs(output_dir, exist_ok=True)

    validator = Validator(record_size)

    t = get_transformer(get_type(path))()
    t.parse(path)
    # t = load_transformer(path, input_type)
    validator.validate(t.graph)

    for error_type, failures in validator.error_dict.items():
        filename = error_type.replace(' ', '_') + '.log'
        with click.open_file(os.path.join(output_dir, filename), 'a+') as f:
            f.write('--- {} ---\n'.format(datetime.now()))
            for t in failures:
                if len(t) == 2:
                    n, message = t
                    if message is not None:
                        f.write('node({}):\t{}\n'.format(n, message))
                    else:
                        f.write('node({})\n'.format(n))
                elif len(t) == 3:
                    u, v, message = t
                    if message is not None:
                        f.write('edge({}, {}):\t{}\n'.format(u, v, message))
                    else:
                        f.write('edge({}, {})\n'.format(u, v))

    if validator.error_dict == {}:
        click.echo('No errors found')
    else:
        for key, value in validator.error_dict.items():
            click.echo('{} - {}'.format(key, len(value)))
コード例 #9
0
ファイル: translator_kgx.py プロジェクト: cnbennett3/kgx
def merge(inputs, output):
    """
    Loads a series of knowledge graphs and merges cliques using `same_as` edges
    as well as `same_as` node properties. The resulting graph will not have any
    `same_as` edges, and the remaining clique leader nodes will have all
    equivalent identifiers in their `same_as` property.
    """
    transformers = []
    output_transformer = get_transformer(get_type(output))()
    graph = None
    for path in inputs:
        construct = get_transformer(get_type(path))
        if construct is None:
            raise Exception('No transformer for {}'.format(path))
        transformers.append(construct())
    for transformer, path in zip(transformers, inputs):
        if graph is None:
            graph = transformer.graph
        else:
            transformer.graph = graph
        transformer.parse(path)
    output_transformer.graph = graph
    output_transformer.graph = clique_merge(output_transformer.graph)
    output_transformer.save(output)
コード例 #10
0
ファイル: translator_kgx.py プロジェクト: cnbennett3/kgx
def validate(config, path, output, output_dir):
    t = get_transformer(get_type(path))()
    t.parse(path)

    validator = Validator()
    validator.validate(t.graph)

    time = datetime.now()

    if len(validator.errors) == 0:
        click.echo('No errors found')

    else:
        append_errors_to_file(output, validator.errors, time)
        if output_dir is not None:
            append_errors_to_files(output_dir, validator.errors, time)
コード例 #11
0
def transform(config: dict, inputs: List[str], input_type: str, output: str,
              output_type: str, mapping: str, preserve: bool):
    """
    Transform a Knowledge Graph from one serialization form to another.
    \f

    Parameters
    ----------
    config: dict
        A dictionary containing the configuration for kgx.cli
    inputs: List[str]
        A list of files that contains nodes/edges
    input_type: str
        The input type
    output: str
        The output file
    output_type: str
        The output type
    mapping: str
        A mapping file (TSV) for remapping node identifiers
    preserve: bool
        Whether to preserve old identifiers before remapping

    """
    # load
    input_transformer = load_transformer(inputs, input_type)

    if mapping is not None:
        # remap
        mapping_dictionary = {}
        with open(mapping) as M:
            for line in M:
                element = line.rstrip().split('\t')
                mapping_dictionary[element[0]] = element[1]
        logging.info('Performing remapping based on {}'.format(mapping))
        map_graph(input_transformer.graph,
                  mapping=mapping_dictionary,
                  preserve=preserve)

    # save
    output_transformer = get_transformer(output_type)
    if output_transformer is None:
        logging.error('Output does not have a recognized type: ' +
                      str(get_file_types()))
    w = output_transformer(input_transformer.graph)
    w.save(output, extension=output_type)
コード例 #12
0
ファイル: translator_kgx.py プロジェクト: vemonet/kgx
def load_and_merge(config: dict, load_config):
    """
    Load nodes and edges from files and KGs, as defined in a config YAML, and merge them into a single graph.
    The merge happens in-memory. This merged graph can then be written to a local/remote Neo4j instance
    OR be serialized into a file.
    \f

    .. note::
        Everything here is driven by the ``load_config`` YAML.

    Parameters
    ----------
    """
    with open(load_config, 'r') as YML:
        cfg = yaml.load(YML, Loader=yaml.FullLoader)

    transformers = []
    for key in cfg['target']:
        target = cfg['target'][key]
        logging.info("Loading {}".format(key))
        if target['type'] in get_file_types():
            # loading from a file
            transformer = get_transformer(target['type'])()
            if target['type'] in {'tsv', 'neo4j'}:
                # currently supporting filters only for TSV and Neo4j
                if 'filters' in target:
                    filters = target['filters']
                    node_filters = filters[
                        'node_filters'] if 'node_filters' in filters else {}
                    edge_filters = filters[
                        'edge_filters'] if 'edge_filters' in filters else {}
                    for k, v in node_filters.items():
                        transformer.set_node_filter(k, set(v))
                    for k, v in edge_filters.items():
                        transformer.set_edge_filter(k, set(v))
                    logging.info(f"with node filters: {node_filters}")
                    logging.info(f"with edge filters: {edge_filters}")
            for f in target['filename']:
                transformer.parse(f, input_format=target['type'])
            transformers.append(transformer)
        elif target['type'] == 'neo4j':
            transformer = kgx.NeoTransformer(None, target['uri'],
                                             target['username'],
                                             target['password'])
            if 'filters' in target:
                filters = target['filters']
                node_filters = filters[
                    'node_filters'] if 'node_filters' in filters else {}
                edge_filters = filters[
                    'edge_filters'] if 'edge_filters' in filters else {}
                for k, v in node_filters.items():
                    transformer.set_node_filter(k, set(v))
                for k, v in edge_filters.items():
                    transformer.set_edge_filter(k, set(v))
                logging.info(f"with node filters: {node_filters}")
                logging.info(f"with edge filters: {edge_filters}")
            transformer.load()
            transformers.append(transformer)
        else:
            logging.error(
                "type {} not yet supported for KGX load-and-merge operation.".
                format(target['type']))

    merged_graph = merge_all_graphs([x.graph for x in transformers])

    destination = cfg['destination']
    if destination['type'] in ['csv', 'tsv', 'ttl', 'json', 'tar']:
        destination_transformer = get_transformer(
            destination['type'])(merged_graph)
        destination_transformer.save(destination['filename'])
    elif destination['type'] == 'neo4j':
        destination_transformer = kgx.NeoTransformer(
            merged_graph,
            uri=destination['uri'],
            username=destination['username'],
            password=destination['password'])
        destination_transformer.save()
    else:
        logging.error(
            "type {} not yet supported for KGX load-and-merge operation.".
            format(destination['type']))
コード例 #13
0
ファイル: translator_kgx.py プロジェクト: vemonet/kgx
def neo4j_download(config: dict, address: str, username: str, password: str,
                   output: str, output_type: str, subject_label: str,
                   object_label: str, edge_label: str, directed: bool,
                   page_size: int, stop_after: int):
    """
    Download nodes and edges from Neo4j database.
    \f

    Parameters
    ----------
    config: dict
        A dictionary containing the configuration for kgx.cli
    address: str
        The full HTTP address for Neo4j database
    username: str
        Username for authentication
    password: str
        Password for authentication
    output: str
        Where to write the output (stdout, by default)
    output_type: str
        The output type (``csv``, by default)
    subject_label: str
        The label for subject node in an association
    object_label: str
        The label for object node in an association
    edge_label: str
        The label for the edge in an association
    directed: bool
        Whether or not the edge is supposed to be directed (``true``, by default)
    stop_after: int
        The max number of edges to fetch
    page_size: int
        The page size to use while fetching associations from Neo4j (``10000``, by default)

    """
    if not is_writable(output):
        try:
            with open(output, 'w+') as f:
                pass
        except:
            error(f'Cannot write to {output}')

    output_transformer = get_transformer(output_type)()
    G = output_transformer.graph

    driver = http_gdb(address, username=username, password=password)

    subject_label = ':`{}`'.format(subject_label) if isinstance(
        subject_label, str) else ''
    object_label = ':`{}`'.format(object_label) if isinstance(
        object_label, str) else ''
    edge_label = ':`{}`'.format(edge_label) if isinstance(edge_label,
                                                          str) else ''

    if directed:
        query = 'match (n{})-[e{}]->(m{})'.format(subject_label, edge_label,
                                                  object_label)
    else:
        query = 'match (n{})-[e{}]-(m{})'.format(subject_label, edge_label,
                                                 object_label)

    results = driver.query('{} return count(*)'.format(query))
    size = [x[0] for x in results][0]
    print("SIZE: {}".format(size))

    if size == 0:
        click.echo('No records found.')
        return

    click.echo('Using cypher query: {} return n, e, m'.format(query))

    page_size = 1_000
    skip_flag = False

    with click.progressbar(
            list(range(0, size, page_size)),
            label='Downloading {} many edges'.format(size)) as bar:
        for i in bar:
            q = '{} return n, e, m skip {} limit {}'.format(
                query, i, page_size)
            results = driver.query(q)
            for n, e, m in results:
                subject_attr = n['data']
                object_attr = m['data']
                edge_attr = e['data']

                if 'id' not in subject_attr or 'id' not in object_attr:
                    if not skip_flag:
                        click.echo(
                            'Skipping records that have no id attribute')
                        skip_flag = True
                    continue

                s = subject_attr['id']
                o = object_attr['id']

                if 'edge_label' not in edge_attr:
                    edge_attr['edge_label'] = e['metadata']['type']

                if 'category' not in subject_attr:
                    subject_attr['category'] = n['metadata']['labels']

                if 'category' not in object_attr:
                    object_attr['category'] = m['metadata']['labels']

                if s not in G:
                    G.add_node(s, **subject_attr)
                if o not in G:
                    G.add_node(o, **object_attr)

                G.add_edge(s, o, key=edge_attr['edge_label'], **edge_attr)

            if stop_after is not None and G.number_of_edges() > stop_after:
                break

    output_transformer.save(output, extension=output_type)
コード例 #14
0
def load_and_merge(yaml_file: str) -> nx.MultiDiGraph:
    """Load and merge sources defined in the config YAML.

    Args:
        yaml_file: A string pointing to a KGX compatible config YAML.

    Returns:
        networkx.MultiDiGraph: The merged graph.

    """
    gm = GraphMerge()
    config = parse_load_config(yaml_file)
    transformers: List = []

    # make sure all files exist before we start load
    for key in config['target']:
        target = config['target'][key]
        logging.info("Checking that file exist for {}".format(key))
        if target['type'] in get_file_types():
            for f in target['filename']:
                if not os.path.exists(f) or not os.path.isfile(f):
                    raise FileNotFoundError(
                        "File {} for transform {}  in yaml file {} "
                        "doesn't exist! Dying.", f, key, yaml_file)

    # read all the sources defined in the YAML
    for key in config['target']:
        target = config['target'][key]
        logging.info("Loading {}".format(key))
        if target['type'] in get_file_types():
            # loading from a file
            transformer = get_transformer(target['type'])()
            for f in target['filename']:
                transformer.parse(f, input_format='tsv')
                transformer.graph.name = key
            transformers.append(transformer)
        elif target['type'] == 'neo4j':
            transformer = NeoTransformer(None, target['uri'],
                                         target['username'],
                                         target['password'])
            transformer.load()
            transformers.append(transformer)
            transformer.graph.name = key
        else:
            logging.error("type {} not yet supported".format(target['type']))
        stats_filename = f"{key}_stats.yaml"
        generate_graph_stats(transformer.graph, key, stats_filename)

    # merge all subgraphs into a single graph
    merged_graph = gm.merge_all_graphs([x.graph for x in transformers])
    merged_graph.name = 'merged_graph'
    generate_graph_stats(merged_graph, merged_graph.name,
                         f"merged_graph_stats.yaml")

    # write the merged graph
    if 'destination' in config:
        for _, destination in config['destination'].items():
            if destination['type'] == 'neo4j':
                destination_transformer = NeoTransformer(
                    merged_graph,
                    uri=destination['uri'],
                    username=destination['username'],
                    password=destination['password'])
                destination_transformer.save_with_unwind()
            elif destination['type'] in get_file_types():
                destination_transformer = get_transformer(
                    destination['type'])(merged_graph)
                destination_transformer.save(destination['filename'],
                                             extension=destination['type'])
            else:
                logging.error(
                    "type {} not yet supported for KGX load-and-merge operation."
                    .format(destination['type']))

    return merged_graph
コード例 #15
0
ファイル: translator_kgx.py プロジェクト: cnbennett3/kgx
def neo4j_download(config, page_size, stop_after, subject_label, object_label, edge_type, address, username, password, output, output_type):
    if not is_writable(output):
        try:
            with open(output, 'w+') as f:
                pass
        except:
            error(f'Cannot write to {output}')

    output_transformer = get_transformer(get_type(output))()
    G = output_transformer.graph

    driver = http_gdb(address, username=username, password=password)

    subject_label = ':`{}`'.format(subject_label) if isinstance(subject_label, str) else ''
    object_label = ':`{}`'.format(object_label) if isinstance(object_label, str) else ''
    edge_type = ':`{}`'.format(edge_type) if isinstance(edge_type, str) else ''

    match = 'match (n{})-[e{}]->(m{})'.format(subject_label, edge_type, object_label)

    results = driver.query('{} return count(*)'.format(match))

    click.echo('Using cyper query: {} return n, e, m'.format(match))

    for a, in results:
        size = a
        break

    if size == 0:
        click.echo('No data available')
        quit()

    page_size = 1_000

    skip_flag = False

    with click.progressbar(list(range(0, size, page_size)), label='Downloading {} many edges'.format(size)) as bar:
        for i in bar:
            q = '{} return n, e, m skip {} limit {}'.format(match, i, page_size)
            results = driver.query(q)

            for n, e, m in results:
                subject_attr = n['data']
                object_attr = m['data']
                edge_attr = e['data']

                if 'id' not in subject_attr or 'id' not in object_attr:
                    if not skip_flag:
                        click.echo('Skipping records that have no id attribute')
                        skip_flag = True
                    continue

                s = subject_attr['id']
                o = object_attr['id']

                if 'edge_label' not in edge_attr:
                    edge_attr['edge_label'] = e['metadata']['type']

                if 'category' not in subject_attr:
                    subject_attr['category'] = n['metadata']['labels']

                if 'category' not in object_attr:
                    object_attr['category'] = m['metadata']['labels']

                if s not in G:
                    G.add_node(s, **subject_attr)
                if o not in G:
                    G.add_node(o, **object_attr)

                G.add_edge(s, o, key=edge_attr['edge_label'], **edge_attr)

            if stop_after is not None and G.number_of_edges() > stop_after:
                break

    output_transformer.save(output)
コード例 #16
0
ファイル: merge_kg.py プロジェクト: CoronaWhy/kg-covid-19
def load_and_merge(yaml_file: str) -> nx.MultiDiGraph:
    """Load and merge sources defined in the config YAML.

    Args:
        yaml_file: A string pointing to a KGX compatible config YAML.

    Returns:
        networkx.MultiDiGraph: The merged graph.

    """
    config = parse_load_config(yaml_file)
    transformers: List = []

    # make sure all files exist before we start load
    for key in config['target']:
        target = config['target'][key]
        logging.info("Checking that file exist for {}".format(key))
        if target['type'] in get_file_types():
            for f in target['filename']:
                if not os.path.exists(f) or not os.path.isfile(f):
                    raise FileNotFoundError(
                        "File {} for transform {}  in yaml file {} "
                        "doesn't exist! Dying.", f, key, yaml_file)

    # read all the sources defined in the YAML
    for key in config['target']:
        target = config['target'][key]
        logging.info("Loading {}".format(key))
        if target['type'] in get_file_types():
            # loading from a file
            try:
                transformer = get_transformer(target['type'])()
                if target['type'] in {'tsv', 'neo4j'}:
                    if 'filters' in target:
                        apply_filters(target, transformer)
                for f in target['filename']:
                    transformer.parse(f, input_format='tsv')
                    transformer.graph.name = key
                if 'operations' in target:
                    apply_operations(target, transformer)
                transformers.append(transformer)
            except:
                logging.error("Failed loading {}".format(f))
        elif target['type'] == 'neo4j':
            transformer = NeoTransformer(None, target['uri'],
                                         target['username'],
                                         target['password'])
            if 'filters' in target:
                apply_filters(target, transformer)
            transformer.load()
            if 'operations' in target:
                apply_operations(target, transformer)
            transformers.append(transformer)
            transformer.graph.name = key
        else:
            logging.error("type {} not yet supported".format(target['type']))
        stats_filename = f"{key}_stats.yaml"
        generate_graph_stats(transformer.graph, key, stats_filename)

    # merge all subgraphs into a single graph
    merged_graph = merge_all_graphs([x.graph for x in transformers])
    merged_graph.name = 'merged_graph'
    generate_graph_stats(merged_graph, merged_graph.name,
                         "merged_graph_stats.yaml", ['provided_by'],
                         ['provided_by'])

    # write the merged graph
    if 'destination' in config:
        for _, destination in config['destination'].items():
            if destination['type'] == 'neo4j':
                destination_transformer = NeoTransformer(
                    merged_graph,
                    uri=destination['uri'],
                    username=destination['username'],
                    password=destination['password'])
                destination_transformer.save()
            elif destination['type'] in get_file_types():
                destination_transformer = get_transformer(
                    destination['type'])(merged_graph)
                mode = 'w:gz' if destination['type'] in {'tsv'} else None
                if destination['type'] in {'nt', 'nt.gz', 'ttl'}:
                    destination_transformer.set_property_types(PROPERTY_TYPES)
                destination_transformer.save(destination['filename'],
                                             output_format=destination['type'],
                                             mode=mode)
            else:
                logging.error(
                    "type {} not yet supported for KGX load-and-merge operation."
                    .format(destination['type']))

    return merged_graph