Example #1
0
def data_clear(inputs):
    """Clears the database."""
    inputs['job'] = 'clear'
    run_neo4j(inputs, publish=True)
    # get PID setting
    settings = read_settings(inputs['fp'] + '/settings.json')
    new_pid = settings['pid']
    pub.sendMessage('pid', msg=new_pid)
    pub.sendMessage('update', msg='Completed database operations!')
Example #2
0
def data_starter(inputs):
    """Starts up database and uploads specified files. """
    # first check if process already exists
    inputs['job'] = 'upload'
    run_neo4j(inputs, publish=True)
    # get PID setting
    settings = read_settings(inputs['fp'] + '/settings.json')
    new_pid = settings['pid']
    pub.sendMessage('pid', msg=new_pid)
    pub.sendMessage('update', msg='Completed database operations!')
Example #3
0
    def load_settings(self, event):
        """
        Publisher function that loads a dictionary of settings
        and updates the GUI to show these.
        Source: wxpython FileDialog docs
        """
        self.settings = dict()
        with wx.FileDialog(self,
                           "Open settings file",
                           wildcard="json files (*.json)|*.json",
                           style=wx.FD_OPEN
                           | wx.FD_FILE_MUST_EXIST) as fileDialog:

            if fileDialog.ShowModal() == wx.ID_CANCEL:
                return  # the user changed their mind

            # Proceed loading the file chosen by the user
            pathname = fileDialog.GetPath()
            try:
                self.settings = read_settings(pathname)
            except IOError:
                wx.LogError("Cannot open file '%s'." % pathname)
                logger.error("Cannot open file. ", exc_info=True)
        self.currentDirectory = self.settings['fp']
        self.dir_txt.SetValue(self.settings['fp'])
        self.biom_file = self.settings['biom_file']
        self.biom_txt.SetValue('')
        self.network_path = self.settings['network']
        if self.settings['biom_file'] is not None:
            self.checkfiles('biom')
            self.biom_txt.SetValue('\n'.join(self.settings['biom_file']))
        if self.settings['otu_table'] is not None:
            self.count_file = self.settings['otu_table']
            self.checkfiles('count')
            self.count_txt.SetValue('\n'.join(self.settings['otu_table']))
        if self.settings['tax_table'] is not None:
            self.tax_file = self.settings['tax_table']
            self.checkfiles('tax')
            self.tax_txt.SetValue('\n'.join(self.settings['tax_table']))
        if self.settings['split'] is not None:
            self.split = self.settings['split']
        if self.settings['sample_data'] is not None:
            self.sample_file = self.settings['sample_data']
            self.checkfiles('meta')
            self.meta_txt.SetValue('\n'.join(self.settings['sample_data']))
        if self.settings['network'] is not None:
            self.net_choice.SetSelection(1)
        else:
            self.net_choice.SetSelection(0)
        pub.sendMessage('load_settings', msg=self.settings)
        self.send_settings()
Example #4
0
def data_viewer(inputs):
    """
    Gets metadata variables and network names from database.
    """
    old_inputs = read_settings(inputs['fp'] + '/settings.json')
    old_inputs.update(inputs)
    inputs = old_inputs
    netdriver = ImportDriver(user=inputs['username'],
                             password=inputs['password'],
                             uri=inputs['address'],
                             filepath=inputs['fp'])
    meta = netdriver.custom_query(query="MATCH (n:Property)--(Sample) RETURN n.type")
    meta = set([x[y] for x in meta for y in x])
    networks = netdriver.custom_query(query="MATCH (n:Network) RETURN n.name")
    networks = set([x[y] for x in networks for y in x])
    netdriver.close()
    pub.sendMessage('view', msg=(list(meta), list(networks)))
    pub.sendMessage('update', msg='Completed database operations!')
    return list(meta), list(networks)
Example #5
0
def run_network(inputs, publish=False):
    """
    Pipes functions from the different massoc modules to run complete network inference.

    :param inputs: Dictionary of inputs.
    :param publish: If True, publishes messages to be received by GUI.
    :return:
    """
    _create_logger(inputs['fp'])
    old_inputs = read_settings(inputs['fp'] + '/settings.json')
    old_inputs.update(inputs)
    inputs = old_inputs
    # handler to file
    filestore = read_bioms(inputs['procbioms'])
    bioms = Batch(filestore, inputs)
    bioms = Nets(bioms)
    if inputs['tools'] is not None:
        logger.info('Tools to run with default settings: ' + str(inputs['tools']) + ' ')
    bioms.inputs['network'] = list()
    network_names = list()
    for tool in bioms.inputs['tools']:
        for level in bioms.inputs['levels']:
            for name in bioms.inputs['name']:
                filename = bioms.inputs['fp'] + '/' + tool + '_' + level + '_' + name + '.txt'
                network_names.append(filename)
    bioms.inputs['network'] = network_names
    if publish:
        pub.sendMessage('update', msg='Starting network inference. This may take some time!')
    try:
        logger.info('Running network inference...  ')
        networks = run_parallel(bioms)
        networks.write_networks()
    except Exception:
        logger.warning('Failed to complete network inference.  ', exc_info=True)
    write_settings(networks.inputs)
    if publish:
        pub.sendMessage('update', msg="Finished running network inference!")
    logger.info('Finished running network inference.  ')
Example #6
0
def run_metastats(inputs, publish=False):
    """
    Module that carries out analysis of metadata on the database.
    This module also interfaces with external APIs to pull in additional metadata.

    :param inputs: Dictionary of inputs.
    :param publish: If True, publishes messages to be received by GUI.
    :return:
    """
    old_inputs = read_settings(inputs['fp'] + '/settings.json')
    old_inputs.update(inputs)
    inputs = old_inputs
    # handler to file
    _create_logger(inputs['fp'])
    checks = str()
    try:
        if publish:
            pub.sendMessage('update', msg='Starting database drivers.')
        # sys.stdout.write('Starting database drivers.')
        metadriver = MetaDriver(user=inputs['username'],
                                password=inputs['password'],
                                uri=inputs['address'],
                                filepath=inputs['fp'])
        importdriver = ImportDriver(user=inputs['username'],
                              password=inputs['password'],
                              uri=inputs['address'],
                              filepath=inputs['fp'])
    except Exception:
        logger.warning("Failed to start database worker.  ", exc_info=True)
    if inputs['sequence']:
        try:
            logger.info('Uploading sequences to database...')
            if publish:
                pub.sendMessage('update', msg='Uploading sequences to database...')
            importdriver.include_sequences(inputs['sequence'])
        except Exception:
            logger.warning("Failed to upload sequences to database.  ", exc_info=True)
    if inputs['add']:
        try:
            logger.info('Uploading additional properties...  ')
            if publish:
                pub.sendMessage('update', msg='Uploading files to database...')
            # create dictionary from file
            # first check if this is an abundance table
            for k in range(len(inputs['add'])):
                filepath = inputs['add'][k]
                with open(filepath, 'r') as file:
                    # Second column name is type
                    # Newline is cutoff
                    colnames = file.readline().split(sep="\t")
                    lines = file.readlines()[1:]
                    if not inputs['type']:
                        label = colnames[0].rstrip()
                    else:
                        label = inputs['type']
                    # if the supplied file is a dataframe,
                    # treat first column as source and rest as target
                    logger.info('Found ' + str(len(colnames)) + ' properties.')
                    for i in range(1, len(colnames)):
                        # give a logger update every 5th property
                        node_dict = dict()
                        name = colnames[i].rstrip()
                        if i % 5 == 0:
                            logger.info('Working on the ' + str(i) + 'th property.')
                        for line in lines:
                            source = line.split(sep="\t")[0].rstrip()
                            weight = None
                            if inputs['abundance']:
                                target = colnames[i].rstrip()
                                name = inputs['abundance'][k]
                                weight = line.split(sep="\t")[i].rstrip()
                            else:
                                target = line.split(sep="\t")[i].rstrip()
                            if weight != 0:
                                node_dict[source] = {'target': target, 'weight': weight}
                        importdriver.include_nodes(nodes=node_dict, name=name, label=label)
        except Exception:
            logger.warning("Failed to upload properties to database.  ", exc_info=True)
    inputs['add'] = None
    inputs['type'] = None
    # prevents reuploading
    try:
        # write operations here
        if inputs['agglom']:
            tax_list = ['Species', 'Genus', 'Family', 'Order', 'Class', 'Phylum', 'Kingdom']
            level_id = tax_list.index(inputs['agglom'].capitalize())
            if inputs['weight']:
                mode = inputs['weight']
            else:
                mode = 'Ignore weight'
            for level in range(0, level_id+1):
                # pub.sendMessage('update', msg="Agglomerating edges...")
                logger.info("Agglomerating edges...")
                metadriver.agglomerate_network(level=tax_list[level], mode=mode)
            checks += 'Successfully agglomerated edges. \n'
    except Exception:
        logger.warning("Failed to carry out edge agglomeration.  ", exc_info=True)
        checks += 'Failed to carry out edge agglomeration. \n'
    try:
        if inputs['variable']:
            logger.info("Associating samples...  ")
            pub.sendMessage('update', msg="Associating samples...")
            # sys.stdout.write("Associating samples...")
            if inputs['variable'][0] == 'all':
                properties = set([x[y] for x in metadriver.custom_query("MATCH (n:Property) RETURN n.type") for y in x])
                for prop in properties:
                    metadriver.associate_samples(label=prop)
            else:
                for var in inputs['variable']:
                    metadriver.associate_samples(label=var)
            checks += 'Completed associations. \n'
    except Exception:
        logger.warning("Failed to compute metadata associations.  ", exc_info=True)
        checks += 'Failed to compute metadata associations. \n'
    if publish:
        pub.sendMessage('database_log', msg=checks)
    # functions to include:
    # include_sequences
    metadriver.close()
    importdriver.close()
    logger.info('Completed metastats operations!  ')
    write_settings(inputs)
Example #7
0
def run_netstats(inputs, publish=False):
    """
    Runs statistical analyses on the Neo4j database, as well as logic operations.
    To do: null models.

    :param inputs: Dictionary of inputs.
    :param publish: If True, publishes messages to be received by GUI.
    :return:
    """
    old_inputs = read_settings(inputs['fp'] + '/settings.json')
    old_inputs.update(inputs)
    inputs = old_inputs
    # handler to file
    _create_logger(inputs['fp'])
    checks = str()
    if 'pid' in inputs:
        existing_pid = pid_exists(inputs['pid'])
    else:
        existing_pid = False
    if not existing_pid:
        start_database(inputs, publish)
        existing_pid = True
    try:
        if publish:
            pub.sendMessage('update', msg='Starting database drivers.')
            # sys.stdout.write('Starting database drivers.')
        netdriver = NetDriver(user=inputs['username'],
                              password=inputs['password'],
                              uri=inputs['address'], filepath=inputs['fp'])
        importdriver = ImportDriver(user=inputs['username'],
                                    password=inputs['password'],
                                    uri=inputs['address'], filepath=inputs['fp'])

    except Exception:
        logger.warning("Failed to start database worker.  ", exc_info=True)
    try:
        # write operations here
        if inputs['logic']:
            if not inputs['networks']:
                networks = list()
                hits = importdriver.custom_query("MATCH (n:Network) RETURN n")
                for hit in hits:
                    networks.append(hit['n'].get('name'))
            else:
                networks = inputs['networks']
            if 'union' in inputs['logic']:
                netdriver.graph_union(networks=networks)
            if 'intersection' in inputs['logic']:
                for n in inputs['num']:
                    netdriver.graph_intersection(networks=networks,
                                                 weight=inputs['weight'], n=int(n))
            if 'difference' in inputs['logic']:
                netdriver.graph_difference(networks=networks,
                                           weight=inputs['weight'])
            checks += 'Logic operations completed. \n'
            if publish:
                pub.sendMessage('update', msg="Exporting network...")
                if inputs['networks'] is not None:
                    names = [x.split('.')[0] for x in inputs['networks']]
                    importdriver.export_network(path=inputs['fp'] + '/' +
                                                "_".join(names) + '.graphml')
                    logger.info("Exporting networks to: " + inputs['fp'] + '/' +
                                "_".join(names) + '.graphml')
                    checks += "Exporting networks to: " + inputs['fp'] + '/' +\
                              "_".join(names) + '.graphml' "\n"
                else:
                    importdriver.export_network(path=inputs['fp'] + '/' +
                                                      '_complete.graphml')
                    logger.info("Exporting networks to: " + inputs['fp'] + '/' +
                                '_complete.graphml')
                    checks += "Exporting networks to: " + inputs['fp'] + '/' +\
                              '_complete.graphml' "\n"
        else:
            logger.warning("No logic operation specified!")
        if publish:
            pub.sendMessage('update', msg="Completed database operations!")
        # sys.stdout.write("Completed database operations!")
        checks += 'Completed database operations! \n'
    except Exception:
        logger.warning("Failed to run database worker.  ", exc_info=True)
        checks += 'Failed to run database worker. \n'
    if publish:
        pub.sendMessage('database_log', msg=checks)
    importdriver.close()
    netdriver.close()
    logger.info('Completed netstats operations!  ')
    write_settings(inputs)
Example #8
0
def run_neo4j(inputs, publish=False):
    """
    Starts and carries out operations on the Neo4j database.

    :param inputs: Dictionary of inputs.
    :param publish: If True, publishes messages to be received by GUI.
    :return:
    """
    _create_logger(inputs['fp'])
    # overwritten settings should be retained
    old_inputs = read_settings(inputs['fp'] + '/settings.json')
    # handler to file
    # check if password etc is already there
    if 'username' in old_inputs:
        logins = dict((k, old_inputs[k]) for k in ('username', 'password', 'address', 'neo4j'))
    old_inputs.update(inputs)
    inputs = old_inputs
    if 'pid' in inputs:
        existing_pid = pid_exists(inputs['pid'])
    else:
        existing_pid = False
    if not inputs['neo4j']:
        inputs.update(logins)
    checks = str()
    if inputs['job'] == 'start':
        if not existing_pid:
            start_database(inputs, publish)
            existing_pid = True
        else:
            logger.info("Database is already running.  ")
    elif inputs['job'] == 'quit':
        if not existing_pid:
            logger.info("No database open.  ")
        else:
            try:
                if publish:
                    pub.sendMessage('update', msg='Getting PID...')
                # there is a lingering Java process that places a lock on the database.
                # terminating the subprocess does NOT terminate the Java process,
                # so the store lock has to be deleted manually.
                # This is different for Linux & Windows machines and may not be trivial
                # however, PID solution may be platform-independent
                # CURRENT SOLUTION:
                # get parent PID of subprocess
                # use psutil to get child PIDs
                # kill child PIDs too
                parent_pid = inputs['pid']
                parent = Process(parent_pid)
                children = parent.children(recursive=True)
                for child in children:
                    child.kill()
                # apparently killing the children also kills the parent
            except Exception:
                logger.warning("Failed to close database.  ", exc_info=True)
    elif inputs['job'] == 'clear':
        if not existing_pid:
            start_database(inputs, publish)
            existing_pid = True
        try:
            if publish:
                pub.sendMessage('update', msg='Clearing database...')
            importdriver = ImportDriver(user=inputs['username'],
                                        password=inputs['password'],
                                        uri=inputs['address'], filepath=inputs['fp'])
            importdriver.clear_database()
            importdriver.close()
        except Exception:
            logger.warning("Failed to clear database.  ", exc_info=True)
    elif inputs['job'] == 'write':
        if not existing_pid:
            start_database(inputs, publish)
            existing_pid = True
        try:
            if publish:
                pub.sendMessage('update', msg='Accessing database...')
            importdriver = ImportDriver(user=inputs['username'],
                                        password=inputs['password'],
                                        uri=inputs['address'], filepath=inputs['fp'])
            importdriver.export_network(path=inputs['fp'])
            importdriver.close()
        except Exception:
            logger.warning("Failed to write database to graphml file.  ", exc_info=True)
    elif inputs['job'] == 'cyto':
        if not existing_pid:
            start_database(inputs, publish)
            existing_pid = True
        try:
            if publish:
                pub.sendMessage('update', msg='Accessing database...')
            importdriver = ImportDriver(user=inputs['username'],
                                        password=inputs['password'],
                                        uri=inputs['address'], filepath=inputs['fp'])
            importdriver.export_cyto()
            importdriver.close()
        except Exception:
            logger.warning("Failed to export networks to Cytoscape.  ", exc_info=True)
    else:
        if not existing_pid:
            start_database(inputs, publish)
            existing_pid = True
        if publish:
            pub.sendMessage('update', msg='Uploading files to database...')
        filestore = None
        if inputs['procbioms']:
            filestore = read_bioms(inputs['procbioms'])
        # ask users for additional input
        bioms = Batch(filestore, inputs)
        bioms = Nets(bioms)
        for file in inputs['network']:
            network = _read_network(file)
            bioms.add_networks(network, file)
        importdriver = None
        sleep(12)
        importdriver = ImportDriver(user=inputs['username'],
                                    password=inputs['password'],
                                    uri=inputs['address'], filepath=inputs['fp'])
        # importdriver.clear_database()
        try:
            # pub.sendMessage('update', msg='Uploading BIOM files...')
            logger.info("Uploading BIOM files...")
            itemlist = list()
            for level in inputs['procbioms']:
                for item in inputs['procbioms'][level]:
                    name = inputs['procbioms'][level][item]
                    biomfile = load_table(name)
                    importdriver.convert_biom(biomfile=biomfile, exp_id=name)
                    itemlist.append(name)
            checks += 'Successfully uploaded the following items and networks to the database: \n'
            for item in itemlist:
                checks += (item + '\n')
            checks += '\n'
            logger.info(checks)
        except Exception:
            logger.warning("Failed to upload BIOM files to Neo4j database.  ", exc_info=True)
        try:
            # pub.sendMessage('update', msg='Uploading network files...')
            logger.info('Uploading network files...  ')
            for item in bioms.networks:
                network = bioms.networks[item]
                # try to split filename to make a nicer network id
                subnames = item.split('/')
                if len(subnames) == 1:
                    subnames = item.split('\\')
                name = subnames[-1].split('.')[0]
                importdriver.convert_networkx(network=network, network_id=name, mode='weight')
                itemlist.append(item)
        except Exception:
            logger.warning('Unable to upload network files to Neo4j database. ', exc_info=True)
            checks += 'Unable to upload network files to Neo4j database.\n'
        if publish:
            pub.sendMessage('database_log', msg=checks)
        importdriver.close()
    logger.info('Completed database operations!  ')
    write_settings(inputs)