def main(): """Imports according to the given arguments. """ args = main_parse_args() merge_keys = ['node', 'node_meta', 'edge2line', 'status', 'edge', \ 'edge_meta', 'raw_line', 'table', 'log'] if args.importfile == 'log': args.importfile = merge_logs(args) elif args.importfile in merge_keys: args.importfile = merge(args.importfile, args) table = '' ld_cmd = '' dup_cmd = '' for key in args.importfile.split('.'): if key in merge_keys: table = key break if not table: raise ValueError("ERROR: 'importfile' must contain one of "+\ ','.join(merge_keys)) import_file(args.importfile, table, ld_cmd, dup_cmd, args) if table == 'node_meta': filename = args.importfile.replace("node_meta", "node_meta_table") mu.get_database("KnowNet", args).dump_table(table, filename) ru.import_node_meta(filename, args)
def main(): """Parses arguments and then exports the specified subnetworks. """ parser = ArgumentParser() parser = cf.add_config_args(parser) parser = su.add_config_args(parser) parser.add_argument("-e", "--edge_type", help="Edge type") parser.add_argument("-s", "--species", help="Species") args = parser.parse_args() db = mu.get_database(args=args) db.use_db("KnowNet") cls, bidir = figure_out_class(db, args.edge_type) edges_fn = '{}.{}.edge'.format(args.species, args.edge_type) nodes_fn = '{}.{}.node_map'.format(args.species, args.edge_type) meta_fn = '{}.{}.metadata'.format(args.species, args.edge_type) bucket_dir = os.path.join(cls, args.species, args.edge_type) sync_dir = os.path.join(args.bucket, bucket_dir) sync_edges = os.path.join(sync_dir, edges_fn) sync_nodes = os.path.join(sync_dir, nodes_fn) sync_meta = os.path.join(sync_dir, meta_fn) if not args.force_fetch and all( map(os.path.exists, [sync_edges, sync_nodes, sync_meta])): print("Files already exist. Skipping.") return get = get_gg if cls == 'Gene' else get_pg res = get(db, args.edge_type, args.species) print("ProductionLines: " + str(len(res))) if not args.force_fetch and should_skip(cls, res): print('Skipping {}.{}'.format(args.species, args.edge_type)) return res, lines = norm_edges(res, args) n1des = list(set(i[0] for i in res)) n2des = list(set(i[1] for i in res)) n1des_desc = convert_nodes(args, n1des) n2des_desc = convert_nodes(args, n2des) nodes_desc = set(n1des_desc) | set(n2des_desc) metadata = get_metadata(db, res, nodes_desc, lines, args.species, args.edge_type, args) db.close() os.makedirs(sync_dir, exist_ok=True) with open(sync_edges, 'w') as file: csvw = csv.writer(file, delimiter='\t') csvw.writerows(res) with open(sync_nodes, 'w', encoding='utf-8') as file: csvw = csv.writer(file, delimiter='\t') csvw.writerows(nodes_desc) with open(sync_meta, 'w') as file: yaml.dump(metadata, file, default_flow_style=False)
def species_import(alias_dict, args=cf.config_args()): """Produces the species.txt file and imports it into the database. Also creates a species.json file. This takes the alias dictionary and creates the species table: taxon sp_abbrev sp_sciname representative and imports the table into the database. It also produces a species.json file of the form species:taxid. Args: alias_dict (dict): alias dictionary describing the source Returns: """ src_data_dir = os.path.join(args.working_dir, args.data_path, cf.DEFAULT_MAP_PATH) table_dir = os.path.join(src_data_dir, 'species') os.makedirs(table_dir, exist_ok=True) table_file = os.path.join(table_dir, 'species.txt') species_file = table_file.replace('txt', 'json') species_dict = dict() if os.path.isfile(species_file): os.remove(species_file) # previous_species = json.load(open(species_file)) # species_dict.update(previous_species) with open(table_file, 'a') as sp_file: for species in alias_dict: taxid = alias_dict[species].split('::')[0] species = species.capitalize().replace('_', ' ') species_dict[species] = taxid sp_abbrev = species[0] + species.split(' ')[1][:3] sp_file.write('\t'.join([taxid, sp_abbrev, species, species]) + '\n') db.get_database(None, args).import_table('KnowNet', table_file, '--ignore') with open(species_file, 'w') as outfile: json.dump(species_dict, outfile, indent=4, sort_keys=True)
def enable_keys(args=None): """Imports the provided file into the KnowEnG MySQL database using optimal settings. Starts a transaction and changes some MySQL settings for optimization, which disables the keys. It then loads the data into the provided table in MySQL. Note that the keys are not re-enabled after import. To do this call mysql_utilities.get_database('KnowNet', args).enable_keys(). Args: file_name (str): path to the file to be imported table (str): name of the permanent table to import to ld_cmd (str): optional additional command for loading data args (Namespace): args as populated namespace or 'None' for defaults """ if args is None: args = cf.config_args() db = mu.get_database('KnowNet', args) db.enable_keys() db.close()
def import_production_edges(args=None): """Query production edges from status table into the edge table. Queries the KnowNet status table and copies all distinct production edges to the edge table. If a duplication occurs during the query, it updates to the maximum edge score and keeps the edge hash for that edge. Args: args (Namespace): args as populated namespace or 'None' for defaults """ if args is None: args = cf.config_args() db = mu.get_database('KnowNet', args) cmd = ('SELECT DISTINCT n1_id, n2_id, et_name, weight, edge_hash ' 'FROM KnowNet.status WHERE status.status="production" ' 'ON DUPLICATE KEY UPDATE edge.weight = ' 'IF(edge.weight > status.weight, edge.weight, status.weight)') tablename = 'KnowNet.edge' db.insert(tablename, cmd)
def import_file(file_name, table, ld_cmd='', dup_cmd='', args=None): """Imports the provided file into the KnowEnG MySQL database. Loads the data into a temporary table in MySQL. It then queries from the temporary table into the corresponding permanent table. If a duplication occurs during the query, it uses the provided behavior to handle. If no behavior is provided, it replaces into the table. Args: file_name (str): path to the file to be imported table (str): name of the permanent table to import to ld_cmd (str): optional additional command for loading data dup_cmd (str): command for handling duplicates args (Namespace): args as populated namespace or 'None' for defaults """ if args is None: args = cf.config_args() db = mu.get_database('KnowNet', args) print('Inserting data from into ' + table) db.load_data(file_name, table, ld_cmd) db.close()
def db_import(version_dict, args=cf.config_args()): """Imports the data into the database and saves local id mapping dictionaries. This takes the version dictionary (source.alias.json) and imports all relevant tables into the database. It then combines all the relevant tables for gene id mapping, and saves local copies of the mapping dictionaries. Args: version_json (dict): path to the version dictionary describing the source:alias Returns: """ db.import_ensembl(version_dict['alias'], args) db.combine_tables(version_dict['alias'], args) db.query_all_mappings(version_dict, args) node_table = db.import_nodes(version_dict, args) ru.import_gene_nodes(node_table, args) ru.import_ensembl(version_dict['alias'], args) db_name = 'ensembl_' + version_dict['alias'] mysql_db = db.get_database(db_name, args) mysql_db.drop_db(db_name)
def import_filemeta(version_dict, args=None): """Imports the provided version_dict into the KnowEnG MySQL database. Loads the data from an version dictionary into the raw_file table. Args: version_dict (dict): version dictionary describing a downloaded file args (Namespace): args as populated namespace or 'None' for defaults """ if args is None: args = cf.config_args() db = mu.get_database('KnowNet', args) values = [ version_dict["source"] + '.' + version_dict["alias"], version_dict["remote_url"], version_dict["remote_date"], version_dict["remote_version"], version_dict["remote_size"], version_dict["source_url"], version_dict["image"], version_dict["reference"], version_dict["pmid"], version_dict["license"], 'CURRENT_TIMESTAMP', version_dict["local_file_name"], 'NULL' ] cmd = 'VALUES( ' + ','.join('%s' for i in values) + ')' db.replace_safe('raw_file', cmd, values) db.close()