def main():
    """Imports according to the given arguments.
    """
    args = main_parse_args()
    merge_keys = ['node', 'node_meta', 'edge2line', 'status', 'edge', \
                  'edge_meta', 'raw_line', 'table', 'log']
    if args.importfile == 'log':
        args.importfile = merge_logs(args)
    elif args.importfile in merge_keys:
        args.importfile = merge(args.importfile, args)
    table = ''
    ld_cmd = ''
    dup_cmd = ''
    for key in args.importfile.split('.'):
        if key in merge_keys:
            table = key
            break
    if not table:
        raise ValueError("ERROR: 'importfile' must contain one of "+\
                         ','.join(merge_keys))
    import_file(args.importfile, table, ld_cmd, dup_cmd, args)
    if table == 'node_meta':
        filename = args.importfile.replace("node_meta", "node_meta_table")
        mu.get_database("KnowNet", args).dump_table(table, filename)
        ru.import_node_meta(filename, args)
def main():
    """Parses arguments and then exports the specified subnetworks.
    """
    parser = ArgumentParser()
    parser = cf.add_config_args(parser)
    parser = su.add_config_args(parser)
    parser.add_argument("-e", "--edge_type", help="Edge type")
    parser.add_argument("-s", "--species", help="Species")
    args = parser.parse_args()

    db = mu.get_database(args=args)
    db.use_db("KnowNet")

    cls, bidir = figure_out_class(db, args.edge_type)
    edges_fn = '{}.{}.edge'.format(args.species, args.edge_type)
    nodes_fn = '{}.{}.node_map'.format(args.species, args.edge_type)
    meta_fn = '{}.{}.metadata'.format(args.species, args.edge_type)
    bucket_dir = os.path.join(cls, args.species, args.edge_type)
    sync_dir = os.path.join(args.bucket, bucket_dir)
    sync_edges = os.path.join(sync_dir, edges_fn)
    sync_nodes = os.path.join(sync_dir, nodes_fn)
    sync_meta = os.path.join(sync_dir, meta_fn)

    if not args.force_fetch and all(
            map(os.path.exists, [sync_edges, sync_nodes, sync_meta])):
        print("Files already exist.  Skipping.")
        return

    get = get_gg if cls == 'Gene' else get_pg
    res = get(db, args.edge_type, args.species)

    print("ProductionLines: " + str(len(res)))
    if not args.force_fetch and should_skip(cls, res):
        print('Skipping {}.{}'.format(args.species, args.edge_type))
        return
    res, lines = norm_edges(res, args)

    n1des = list(set(i[0] for i in res))
    n2des = list(set(i[1] for i in res))

    n1des_desc = convert_nodes(args, n1des)
    n2des_desc = convert_nodes(args, n2des)
    nodes_desc = set(n1des_desc) | set(n2des_desc)

    metadata = get_metadata(db, res, nodes_desc, lines, args.species,
                            args.edge_type, args)
    db.close()

    os.makedirs(sync_dir, exist_ok=True)
    with open(sync_edges, 'w') as file:
        csvw = csv.writer(file, delimiter='\t')
        csvw.writerows(res)
    with open(sync_nodes, 'w', encoding='utf-8') as file:
        csvw = csv.writer(file, delimiter='\t')
        csvw.writerows(nodes_desc)
    with open(sync_meta, 'w') as file:
        yaml.dump(metadata, file, default_flow_style=False)
Exemple #3
0
def species_import(alias_dict, args=cf.config_args()):
    """Produces the species.txt file and imports it into the database. Also
    creates a species.json file.

    This takes the alias dictionary and creates the species table:
    taxon   sp_abbrev   sp_sciname  representative
    and imports the table into the database. It also produces a species.json
    file of the form species:taxid.

    Args:
        alias_dict (dict): alias dictionary describing the source

    Returns:
    """
    src_data_dir = os.path.join(args.working_dir, args.data_path,
                                cf.DEFAULT_MAP_PATH)
    table_dir = os.path.join(src_data_dir, 'species')
    os.makedirs(table_dir, exist_ok=True)
    table_file = os.path.join(table_dir, 'species.txt')
    species_file = table_file.replace('txt', 'json')
    species_dict = dict()
    if os.path.isfile(species_file):
        os.remove(species_file)
    #    previous_species = json.load(open(species_file))
    #    species_dict.update(previous_species)
    with open(table_file, 'a') as sp_file:
        for species in alias_dict:
            taxid = alias_dict[species].split('::')[0]
            species = species.capitalize().replace('_', ' ')
            species_dict[species] = taxid
            sp_abbrev = species[0] + species.split(' ')[1][:3]
            sp_file.write('\t'.join([taxid, sp_abbrev, species, species]) +
                          '\n')
    db.get_database(None, args).import_table('KnowNet', table_file, '--ignore')
    with open(species_file, 'w') as outfile:
        json.dump(species_dict, outfile, indent=4, sort_keys=True)
def enable_keys(args=None):
    """Imports the provided  file into the KnowEnG MySQL database using optimal
    settings.

    Starts a transaction and changes some MySQL settings for optimization, which
    disables the keys. It then loads the data into the provided table in MySQL.
    Note that the keys are not re-enabled after import. To do this call
    mysql_utilities.get_database('KnowNet', args).enable_keys().

    Args:
        file_name (str): path to the file to be imported
        table (str): name of the permanent table to import to
        ld_cmd (str): optional additional command for loading data
        args (Namespace): args as populated namespace or 'None' for defaults
    """
    if args is None:
        args = cf.config_args()
    db = mu.get_database('KnowNet', args)
    db.enable_keys()
    db.close()
def import_production_edges(args=None):
    """Query production edges from status table into the edge table.

    Queries the KnowNet status table and copies all distinct production edges
    to the edge table. If a duplication occurs during the query, it updates to
    the maximum edge score and keeps the edge hash for that edge.

    Args:
        args (Namespace): args as populated namespace or 'None' for defaults
    """

    if args is None:
        args = cf.config_args()
    db = mu.get_database('KnowNet', args)
    cmd = ('SELECT DISTINCT n1_id, n2_id, et_name, weight, edge_hash '
           'FROM KnowNet.status WHERE status.status="production" '
           'ON DUPLICATE KEY UPDATE edge.weight = '
           'IF(edge.weight > status.weight, edge.weight, status.weight)')
    tablename = 'KnowNet.edge'
    db.insert(tablename, cmd)
def import_file(file_name, table, ld_cmd='', dup_cmd='', args=None):
    """Imports the provided  file into the KnowEnG MySQL database.

    Loads the data into a temporary table in MySQL. It then queries from the
    temporary table into the corresponding permanent table. If a duplication
    occurs during the query, it uses the provided behavior to handle. If no
    behavior is provided, it replaces into the table.

    Args:
        file_name (str): path to the file to be imported
        table (str): name of the permanent table to import to
        ld_cmd (str): optional additional command for loading data
        dup_cmd (str): command for handling duplicates
        args (Namespace): args as populated namespace or 'None' for defaults
    """
    if args is None:
        args = cf.config_args()
    db = mu.get_database('KnowNet', args)
    print('Inserting data from into ' + table)
    db.load_data(file_name, table, ld_cmd)
    db.close()
Exemple #7
0
def db_import(version_dict, args=cf.config_args()):
    """Imports the data into the database and saves local id mapping
    dictionaries.

    This takes the version dictionary (source.alias.json) and imports all
    relevant tables into the database. It then combines all the relevant tables
    for gene id mapping, and saves local copies of the mapping dictionaries.

    Args:
        version_json (dict): path to the version dictionary describing the
            source:alias

    Returns:
    """
    db.import_ensembl(version_dict['alias'], args)
    db.combine_tables(version_dict['alias'], args)
    db.query_all_mappings(version_dict, args)
    node_table = db.import_nodes(version_dict, args)
    ru.import_gene_nodes(node_table, args)
    ru.import_ensembl(version_dict['alias'], args)
    db_name = 'ensembl_' + version_dict['alias']
    mysql_db = db.get_database(db_name, args)
    mysql_db.drop_db(db_name)
def import_filemeta(version_dict, args=None):
    """Imports the provided version_dict into the KnowEnG MySQL database.

    Loads the data from an version dictionary into the raw_file table.

    Args:
        version_dict (dict): version dictionary describing a downloaded file
        args (Namespace): args as populated namespace or 'None' for defaults
    """
    if args is None:
        args = cf.config_args()
    db = mu.get_database('KnowNet', args)
    values = [
        version_dict["source"] + '.' + version_dict["alias"],
        version_dict["remote_url"], version_dict["remote_date"],
        version_dict["remote_version"], version_dict["remote_size"],
        version_dict["source_url"], version_dict["image"],
        version_dict["reference"], version_dict["pmid"],
        version_dict["license"], 'CURRENT_TIMESTAMP',
        version_dict["local_file_name"], 'NULL'
    ]
    cmd = 'VALUES( ' + ','.join('%s' for i in values) + ')'
    db.replace_safe('raw_file', cmd, values)
    db.close()