def data_clear(inputs): """Clears the database.""" inputs['job'] = 'clear' run_neo4j(inputs, publish=True) # get PID setting settings = read_settings(inputs['fp'] + '/settings.json') new_pid = settings['pid'] pub.sendMessage('pid', msg=new_pid) pub.sendMessage('update', msg='Completed database operations!')
def data_starter(inputs): """Starts up database and uploads specified files. """ # first check if process already exists inputs['job'] = 'upload' run_neo4j(inputs, publish=True) # get PID setting settings = read_settings(inputs['fp'] + '/settings.json') new_pid = settings['pid'] pub.sendMessage('pid', msg=new_pid) pub.sendMessage('update', msg='Completed database operations!')
def load_settings(self, event): """ Publisher function that loads a dictionary of settings and updates the GUI to show these. Source: wxpython FileDialog docs """ self.settings = dict() with wx.FileDialog(self, "Open settings file", wildcard="json files (*.json)|*.json", style=wx.FD_OPEN | wx.FD_FILE_MUST_EXIST) as fileDialog: if fileDialog.ShowModal() == wx.ID_CANCEL: return # the user changed their mind # Proceed loading the file chosen by the user pathname = fileDialog.GetPath() try: self.settings = read_settings(pathname) except IOError: wx.LogError("Cannot open file '%s'." % pathname) logger.error("Cannot open file. ", exc_info=True) self.currentDirectory = self.settings['fp'] self.dir_txt.SetValue(self.settings['fp']) self.biom_file = self.settings['biom_file'] self.biom_txt.SetValue('') self.network_path = self.settings['network'] if self.settings['biom_file'] is not None: self.checkfiles('biom') self.biom_txt.SetValue('\n'.join(self.settings['biom_file'])) if self.settings['otu_table'] is not None: self.count_file = self.settings['otu_table'] self.checkfiles('count') self.count_txt.SetValue('\n'.join(self.settings['otu_table'])) if self.settings['tax_table'] is not None: self.tax_file = self.settings['tax_table'] self.checkfiles('tax') self.tax_txt.SetValue('\n'.join(self.settings['tax_table'])) if self.settings['split'] is not None: self.split = self.settings['split'] if self.settings['sample_data'] is not None: self.sample_file = self.settings['sample_data'] self.checkfiles('meta') self.meta_txt.SetValue('\n'.join(self.settings['sample_data'])) if self.settings['network'] is not None: self.net_choice.SetSelection(1) else: self.net_choice.SetSelection(0) pub.sendMessage('load_settings', msg=self.settings) self.send_settings()
def data_viewer(inputs): """ Gets metadata variables and network names from database. """ old_inputs = read_settings(inputs['fp'] + '/settings.json') old_inputs.update(inputs) inputs = old_inputs netdriver = ImportDriver(user=inputs['username'], password=inputs['password'], uri=inputs['address'], filepath=inputs['fp']) meta = netdriver.custom_query(query="MATCH (n:Property)--(Sample) RETURN n.type") meta = set([x[y] for x in meta for y in x]) networks = netdriver.custom_query(query="MATCH (n:Network) RETURN n.name") networks = set([x[y] for x in networks for y in x]) netdriver.close() pub.sendMessage('view', msg=(list(meta), list(networks))) pub.sendMessage('update', msg='Completed database operations!') return list(meta), list(networks)
def run_network(inputs, publish=False): """ Pipes functions from the different massoc modules to run complete network inference. :param inputs: Dictionary of inputs. :param publish: If True, publishes messages to be received by GUI. :return: """ _create_logger(inputs['fp']) old_inputs = read_settings(inputs['fp'] + '/settings.json') old_inputs.update(inputs) inputs = old_inputs # handler to file filestore = read_bioms(inputs['procbioms']) bioms = Batch(filestore, inputs) bioms = Nets(bioms) if inputs['tools'] is not None: logger.info('Tools to run with default settings: ' + str(inputs['tools']) + ' ') bioms.inputs['network'] = list() network_names = list() for tool in bioms.inputs['tools']: for level in bioms.inputs['levels']: for name in bioms.inputs['name']: filename = bioms.inputs['fp'] + '/' + tool + '_' + level + '_' + name + '.txt' network_names.append(filename) bioms.inputs['network'] = network_names if publish: pub.sendMessage('update', msg='Starting network inference. This may take some time!') try: logger.info('Running network inference... ') networks = run_parallel(bioms) networks.write_networks() except Exception: logger.warning('Failed to complete network inference. ', exc_info=True) write_settings(networks.inputs) if publish: pub.sendMessage('update', msg="Finished running network inference!") logger.info('Finished running network inference. ')
def run_metastats(inputs, publish=False): """ Module that carries out analysis of metadata on the database. This module also interfaces with external APIs to pull in additional metadata. :param inputs: Dictionary of inputs. :param publish: If True, publishes messages to be received by GUI. :return: """ old_inputs = read_settings(inputs['fp'] + '/settings.json') old_inputs.update(inputs) inputs = old_inputs # handler to file _create_logger(inputs['fp']) checks = str() try: if publish: pub.sendMessage('update', msg='Starting database drivers.') # sys.stdout.write('Starting database drivers.') metadriver = MetaDriver(user=inputs['username'], password=inputs['password'], uri=inputs['address'], filepath=inputs['fp']) importdriver = ImportDriver(user=inputs['username'], password=inputs['password'], uri=inputs['address'], filepath=inputs['fp']) except Exception: logger.warning("Failed to start database worker. ", exc_info=True) if inputs['sequence']: try: logger.info('Uploading sequences to database...') if publish: pub.sendMessage('update', msg='Uploading sequences to database...') importdriver.include_sequences(inputs['sequence']) except Exception: logger.warning("Failed to upload sequences to database. ", exc_info=True) if inputs['add']: try: logger.info('Uploading additional properties... ') if publish: pub.sendMessage('update', msg='Uploading files to database...') # create dictionary from file # first check if this is an abundance table for k in range(len(inputs['add'])): filepath = inputs['add'][k] with open(filepath, 'r') as file: # Second column name is type # Newline is cutoff colnames = file.readline().split(sep="\t") lines = file.readlines()[1:] if not inputs['type']: label = colnames[0].rstrip() else: label = inputs['type'] # if the supplied file is a dataframe, # treat first column as source and rest as target logger.info('Found ' + str(len(colnames)) + ' properties.') for i in range(1, len(colnames)): # give a logger update every 5th property node_dict = dict() name = colnames[i].rstrip() if i % 5 == 0: logger.info('Working on the ' + str(i) + 'th property.') for line in lines: source = line.split(sep="\t")[0].rstrip() weight = None if inputs['abundance']: target = colnames[i].rstrip() name = inputs['abundance'][k] weight = line.split(sep="\t")[i].rstrip() else: target = line.split(sep="\t")[i].rstrip() if weight != 0: node_dict[source] = {'target': target, 'weight': weight} importdriver.include_nodes(nodes=node_dict, name=name, label=label) except Exception: logger.warning("Failed to upload properties to database. ", exc_info=True) inputs['add'] = None inputs['type'] = None # prevents reuploading try: # write operations here if inputs['agglom']: tax_list = ['Species', 'Genus', 'Family', 'Order', 'Class', 'Phylum', 'Kingdom'] level_id = tax_list.index(inputs['agglom'].capitalize()) if inputs['weight']: mode = inputs['weight'] else: mode = 'Ignore weight' for level in range(0, level_id+1): # pub.sendMessage('update', msg="Agglomerating edges...") logger.info("Agglomerating edges...") metadriver.agglomerate_network(level=tax_list[level], mode=mode) checks += 'Successfully agglomerated edges. \n' except Exception: logger.warning("Failed to carry out edge agglomeration. ", exc_info=True) checks += 'Failed to carry out edge agglomeration. \n' try: if inputs['variable']: logger.info("Associating samples... ") pub.sendMessage('update', msg="Associating samples...") # sys.stdout.write("Associating samples...") if inputs['variable'][0] == 'all': properties = set([x[y] for x in metadriver.custom_query("MATCH (n:Property) RETURN n.type") for y in x]) for prop in properties: metadriver.associate_samples(label=prop) else: for var in inputs['variable']: metadriver.associate_samples(label=var) checks += 'Completed associations. \n' except Exception: logger.warning("Failed to compute metadata associations. ", exc_info=True) checks += 'Failed to compute metadata associations. \n' if publish: pub.sendMessage('database_log', msg=checks) # functions to include: # include_sequences metadriver.close() importdriver.close() logger.info('Completed metastats operations! ') write_settings(inputs)
def run_netstats(inputs, publish=False): """ Runs statistical analyses on the Neo4j database, as well as logic operations. To do: null models. :param inputs: Dictionary of inputs. :param publish: If True, publishes messages to be received by GUI. :return: """ old_inputs = read_settings(inputs['fp'] + '/settings.json') old_inputs.update(inputs) inputs = old_inputs # handler to file _create_logger(inputs['fp']) checks = str() if 'pid' in inputs: existing_pid = pid_exists(inputs['pid']) else: existing_pid = False if not existing_pid: start_database(inputs, publish) existing_pid = True try: if publish: pub.sendMessage('update', msg='Starting database drivers.') # sys.stdout.write('Starting database drivers.') netdriver = NetDriver(user=inputs['username'], password=inputs['password'], uri=inputs['address'], filepath=inputs['fp']) importdriver = ImportDriver(user=inputs['username'], password=inputs['password'], uri=inputs['address'], filepath=inputs['fp']) except Exception: logger.warning("Failed to start database worker. ", exc_info=True) try: # write operations here if inputs['logic']: if not inputs['networks']: networks = list() hits = importdriver.custom_query("MATCH (n:Network) RETURN n") for hit in hits: networks.append(hit['n'].get('name')) else: networks = inputs['networks'] if 'union' in inputs['logic']: netdriver.graph_union(networks=networks) if 'intersection' in inputs['logic']: for n in inputs['num']: netdriver.graph_intersection(networks=networks, weight=inputs['weight'], n=int(n)) if 'difference' in inputs['logic']: netdriver.graph_difference(networks=networks, weight=inputs['weight']) checks += 'Logic operations completed. \n' if publish: pub.sendMessage('update', msg="Exporting network...") if inputs['networks'] is not None: names = [x.split('.')[0] for x in inputs['networks']] importdriver.export_network(path=inputs['fp'] + '/' + "_".join(names) + '.graphml') logger.info("Exporting networks to: " + inputs['fp'] + '/' + "_".join(names) + '.graphml') checks += "Exporting networks to: " + inputs['fp'] + '/' +\ "_".join(names) + '.graphml' "\n" else: importdriver.export_network(path=inputs['fp'] + '/' + '_complete.graphml') logger.info("Exporting networks to: " + inputs['fp'] + '/' + '_complete.graphml') checks += "Exporting networks to: " + inputs['fp'] + '/' +\ '_complete.graphml' "\n" else: logger.warning("No logic operation specified!") if publish: pub.sendMessage('update', msg="Completed database operations!") # sys.stdout.write("Completed database operations!") checks += 'Completed database operations! \n' except Exception: logger.warning("Failed to run database worker. ", exc_info=True) checks += 'Failed to run database worker. \n' if publish: pub.sendMessage('database_log', msg=checks) importdriver.close() netdriver.close() logger.info('Completed netstats operations! ') write_settings(inputs)
def run_neo4j(inputs, publish=False): """ Starts and carries out operations on the Neo4j database. :param inputs: Dictionary of inputs. :param publish: If True, publishes messages to be received by GUI. :return: """ _create_logger(inputs['fp']) # overwritten settings should be retained old_inputs = read_settings(inputs['fp'] + '/settings.json') # handler to file # check if password etc is already there if 'username' in old_inputs: logins = dict((k, old_inputs[k]) for k in ('username', 'password', 'address', 'neo4j')) old_inputs.update(inputs) inputs = old_inputs if 'pid' in inputs: existing_pid = pid_exists(inputs['pid']) else: existing_pid = False if not inputs['neo4j']: inputs.update(logins) checks = str() if inputs['job'] == 'start': if not existing_pid: start_database(inputs, publish) existing_pid = True else: logger.info("Database is already running. ") elif inputs['job'] == 'quit': if not existing_pid: logger.info("No database open. ") else: try: if publish: pub.sendMessage('update', msg='Getting PID...') # there is a lingering Java process that places a lock on the database. # terminating the subprocess does NOT terminate the Java process, # so the store lock has to be deleted manually. # This is different for Linux & Windows machines and may not be trivial # however, PID solution may be platform-independent # CURRENT SOLUTION: # get parent PID of subprocess # use psutil to get child PIDs # kill child PIDs too parent_pid = inputs['pid'] parent = Process(parent_pid) children = parent.children(recursive=True) for child in children: child.kill() # apparently killing the children also kills the parent except Exception: logger.warning("Failed to close database. ", exc_info=True) elif inputs['job'] == 'clear': if not existing_pid: start_database(inputs, publish) existing_pid = True try: if publish: pub.sendMessage('update', msg='Clearing database...') importdriver = ImportDriver(user=inputs['username'], password=inputs['password'], uri=inputs['address'], filepath=inputs['fp']) importdriver.clear_database() importdriver.close() except Exception: logger.warning("Failed to clear database. ", exc_info=True) elif inputs['job'] == 'write': if not existing_pid: start_database(inputs, publish) existing_pid = True try: if publish: pub.sendMessage('update', msg='Accessing database...') importdriver = ImportDriver(user=inputs['username'], password=inputs['password'], uri=inputs['address'], filepath=inputs['fp']) importdriver.export_network(path=inputs['fp']) importdriver.close() except Exception: logger.warning("Failed to write database to graphml file. ", exc_info=True) elif inputs['job'] == 'cyto': if not existing_pid: start_database(inputs, publish) existing_pid = True try: if publish: pub.sendMessage('update', msg='Accessing database...') importdriver = ImportDriver(user=inputs['username'], password=inputs['password'], uri=inputs['address'], filepath=inputs['fp']) importdriver.export_cyto() importdriver.close() except Exception: logger.warning("Failed to export networks to Cytoscape. ", exc_info=True) else: if not existing_pid: start_database(inputs, publish) existing_pid = True if publish: pub.sendMessage('update', msg='Uploading files to database...') filestore = None if inputs['procbioms']: filestore = read_bioms(inputs['procbioms']) # ask users for additional input bioms = Batch(filestore, inputs) bioms = Nets(bioms) for file in inputs['network']: network = _read_network(file) bioms.add_networks(network, file) importdriver = None sleep(12) importdriver = ImportDriver(user=inputs['username'], password=inputs['password'], uri=inputs['address'], filepath=inputs['fp']) # importdriver.clear_database() try: # pub.sendMessage('update', msg='Uploading BIOM files...') logger.info("Uploading BIOM files...") itemlist = list() for level in inputs['procbioms']: for item in inputs['procbioms'][level]: name = inputs['procbioms'][level][item] biomfile = load_table(name) importdriver.convert_biom(biomfile=biomfile, exp_id=name) itemlist.append(name) checks += 'Successfully uploaded the following items and networks to the database: \n' for item in itemlist: checks += (item + '\n') checks += '\n' logger.info(checks) except Exception: logger.warning("Failed to upload BIOM files to Neo4j database. ", exc_info=True) try: # pub.sendMessage('update', msg='Uploading network files...') logger.info('Uploading network files... ') for item in bioms.networks: network = bioms.networks[item] # try to split filename to make a nicer network id subnames = item.split('/') if len(subnames) == 1: subnames = item.split('\\') name = subnames[-1].split('.')[0] importdriver.convert_networkx(network=network, network_id=name, mode='weight') itemlist.append(item) except Exception: logger.warning('Unable to upload network files to Neo4j database. ', exc_info=True) checks += 'Unable to upload network files to Neo4j database.\n' if publish: pub.sendMessage('database_log', msg=checks) importdriver.close() logger.info('Completed database operations! ') write_settings(inputs)