Exemple #1
0
def Main():
    filter_obj = {
        'dendrite_type': 'spiny',
        'structure_layer_name': '5',
        'structure_area_abbrev': 'VISp'
    }

    ctc = CellTypesCache()
    cells = ctc.get_cells(species=['Mus musculus'])

    cells_df = pd.DataFrame(cells)
    for filt_key, filt_val in filter_obj.items():
        cells_df = cells_df.loc[cells_df[filt_key] == filt_val, :]

    cell_ids = list(cells_df['id'].values)
    rc = Client(profile=os.getenv('IPYTHON_PROFILE'))
    logger.debug('Using ipyparallel with %d engines', len(rc))
    lview = rc.load_balanced_view()

    func = partial(get_fi_data, ctc)
    filter_fi_data = lview.map_sync(func, cell_ids)
    filter_fi_data = [data for data in filter_fi_data if data is not None]
    file_name = 'fi_data.pkl'
    with open(file_name, 'wb') as fh:
        pickle.dump(filter_fi_data, fh)
    plot_fi_data(filter_fi_data)

    rc.shutdown(hub=True)
Exemple #2
0
def test_distributed_executor():
    from distributed import Client

    learner = Learner1D(linear, (-1, 1))
    client = Client(n_workers=1)
    BlockingRunner(learner, trivial_goal, executor=client)
    client.shutdown()
    assert learner.npoints > 0
def stop_server(is_slurm=False):
    '''
    programmatically stops the ipyparallel server
    '''
    sys.stdout.write("Stopping cluster...\n")
    sys.stdout.flush()

    
    if is_slurm:
        from ipyparallel import Client
        pdir, profile = os.environ['IPPPDIR'], os.environ['IPPPROFILE']
        c = Client(ipython_dir=pdir, profile=profile)        
        ee = c[:]
        ne = len(ee)
        print 'Shutting down %d engines.'%(ne)
        c.shutdown(hub=True)
        shutil.rmtree('profile_' + str(profile))
        try: 
            shutil.rmtree('./log/')
        except:
            print 'creating log folder'
            
        files = glob.glob('*.log')
        os.mkdir('./log')
        
        for fl in files:
            shutil.move(fl, './log/')
    
    else:
        
        proc = subprocess.Popen(["ipcluster stop"], shell=True, stderr=subprocess.PIPE)
        line_out = proc.stderr.readline()
        if 'CRITICAL' in line_out:
            sys.stdout.write("No cluster to stop...")
            sys.stdout.flush()
        elif 'Stopping' in line_out:
            st = time.time()
            sys.stdout.write('Waiting for cluster to stop...')
            while (time.time() - st) < 4:
                sys.stdout.write('.')
                sys.stdout.flush()
                time.sleep(1)
        else:
            print '**** Unrecognized Syntax in ipcluster output, waiting for server to stop anyways ****'
    
        

    sys.stdout.write(" done\n")
Exemple #4
0
def stop_server(is_slurm=False):
    '''
    programmatically stops the ipyparallel server
    '''
    sys.stdout.write("Stopping cluster...\n")
    sys.stdout.flush()

    if is_slurm:
        from ipyparallel import Client
        pdir, profile = os.environ['IPPPDIR'], os.environ['IPPPROFILE']
        c = Client(ipython_dir=pdir, profile=profile)
        ee = c[:]
        ne = len(ee)
        print 'Shutting down %d engines.' % (ne)
        c.shutdown(hub=True)
        shutil.rmtree('profile_' + str(profile))
        try:
            shutil.rmtree('./log/')
        except:
            print 'creating log folder'

        files = glob.glob('*.log')
        os.mkdir('./log')

        for fl in files:
            shutil.move(fl, './log/')

    else:

        proc = subprocess.Popen(["ipcluster stop"],
                                shell=True,
                                stderr=subprocess.PIPE)
        line_out = proc.stderr.readline()
        if 'CRITICAL' in line_out:
            sys.stdout.write("No cluster to stop...")
            sys.stdout.flush()
        elif 'Stopping' in line_out:
            st = time.time()
            sys.stdout.write('Waiting for cluster to stop...')
            while (time.time() - st) < 4:
                sys.stdout.write('.')
                sys.stdout.flush()
                time.sleep(1)
        else:
            print '**** Unrecognized Syntax in ipcluster output, waiting for server to stop anyways ****'

    sys.stdout.write(" done\n")
Exemple #5
0
        lb_view = None
        rc = None

    meshFile = 'mesh/striatum-mesh.obj'
    # meshFile = "mesh/cortex-mesh-200.obj"
    sm = RegionMesh(meshFile,
                    d_view=d_view,
                    lb_view=lb_view,
                    raytrace_borders=False)

    # import cProfile
    # cProfile.run("neuronPos = sm.placeNeurons(1000)")

    # sm.plotStruct()

    nNeurons = 1730000
    neuronPos = sm.place_neurons(nNeurons)
    # sm.verify_d_min()
    sm.plot_neurons(pdf_name="figures/striatum-fig-somas.png")

    sm.plot_struct(pdf_name="figures/striatum-fig-struct.png")

    # sm.testPlot()
    # sm.testPlotCached()

    # tp = (sm.minCoord + sm.maxCoord)/2
    # sm.rayCasting(np.array(tp))

    if d_view and rc:
        rc.shutdown(hub=True)
Exemple #6
0
def gibbs(table_fp, mapping_fp, output_dir, loo, jobs, alpha1, alpha2, beta,
          source_rarefaction_depth, sink_rarefaction_depth, restarts,
          draws_per_restart, burnin, delay, cluster_start_delay,
          source_sink_column, source_column_value, sink_column_value,
          source_category_column):
    '''Gibb's sampler for Bayesian estimation of microbial sample sources.

    For details, see the project README file.
    '''
    # Create results directory. Click has already checked if it exists, and
    # failed if so.
    os.mkdir(output_dir)

    # Load the mapping file and biom table and remove samples which are not
    # shared.
    o = open(mapping_fp, 'U')
    sample_metadata_lines = o.readlines()
    o.close()

    sample_metadata, biom_table = \
        _cli_sync_biom_and_sample_metadata(
            parse_mapping_file(sample_metadata_lines),
            load_table(table_fp))

    # If biom table has fractional counts, it can produce problems in indexing
    # later on.
    biom_table.transform(lambda data, id, metadata: np.ceil(data))

    # If biom table has sample metadata, there will be pickling errors when
    # submitting multiple jobs. We remove the metadata by making a copy of the
    # table without metadata.
    biom_table = Table(biom_table._data.toarray(),
                       biom_table.ids(axis='observation'),
                       biom_table.ids(axis='sample'))

    # Parse the mapping file and options to get the samples requested for
    # sources and sinks.
    source_samples, sink_samples = sinks_and_sources(
        sample_metadata,
        column_header=source_sink_column,
        source_value=source_column_value,
        sink_value=sink_column_value)

    # If we have no source samples neither normal operation or loo will work.
    # Will also likely get strange errors.
    if len(source_samples) == 0:
        raise ValueError('Mapping file or biom table passed contain no '
                         '`source` samples.')

    # Prepare the 'sources' matrix by collapsing the `source_samples` by their
    # metadata values.
    sources_envs, sources_data = collapse_sources(source_samples,
                                                  sample_metadata,
                                                  source_category_column,
                                                  biom_table,
                                                  sort=True)

    # Rarefiy data if requested.
    sources_data, biom_table = \
        subsample_sources_sinks(sources_data, sink_samples, biom_table,
                                source_rarefaction_depth,
                                sink_rarefaction_depth)

    # Build function that require only a single parameter -- sample -- to
    # enable parallel processing if requested.
    if loo:
        f = partial(_cli_loo_runner,
                    source_category=source_category_column,
                    alpha1=alpha1,
                    alpha2=alpha2,
                    beta=beta,
                    restarts=restarts,
                    draws_per_restart=draws_per_restart,
                    burnin=burnin,
                    delay=delay,
                    sample_metadata=sample_metadata,
                    sources_data=sources_data,
                    sources_envs=sources_envs,
                    biom_table=biom_table,
                    output_dir=output_dir)
        sample_iter = source_samples
    else:
        f = partial(_cli_sink_source_prediction_runner,
                    alpha1=alpha1,
                    alpha2=alpha2,
                    beta=beta,
                    restarts=restarts,
                    draws_per_restart=draws_per_restart,
                    burnin=burnin,
                    delay=delay,
                    sources_data=sources_data,
                    biom_table=biom_table,
                    output_dir=output_dir)
        sample_iter = sink_samples

    if jobs > 1:
        # Launch the ipcluster and wait for it to come up.
        subprocess.Popen('ipcluster start -n %s --quiet' % jobs, shell=True)
        time.sleep(cluster_start_delay)
        c = Client()
        c[:].map(f, sample_iter, block=True)
        # Shut the cluster down. Answer taken from SO:
        # http://stackoverflow.com/questions/30930157/stopping-ipcluster-engines-ipython-parallel
        c.shutdown(hub=True)
    else:
        for sample in sample_iter:
            f(sample)

    # Format results for output.
    samples = []
    samples_data = []
    for sample_fp in glob.glob(os.path.join(output_dir, '*')):
        samples.append(sample_fp.strip().split('/')[-1].split('.txt')[0])
        samples_data.append(np.loadtxt(sample_fp, delimiter='\t'))
    mp, mps = _cli_collate_results(samples, samples_data, sources_envs)

    o = open(os.path.join(output_dir, 'mixing_proportions.txt'), 'w')
    o.writelines(mp)
    o.close()
    o = open(os.path.join(output_dir, 'mixing_proportions_stds.txt'), 'w')
    o.writelines(mps)
    o.close()
Exemple #7
0
def stop_server(ipcluster='ipcluster', pdir=None, profile=None, dview=None):
    """
    programmatically stops the ipyparallel server

    Parameters:
     ----------
     ipcluster : str
         ipcluster binary file name; requires 4 path separators on Windows
         Default: "ipcluster"

    """
    if 'multiprocessing' in str(type(dview)):
        dview.terminate()
    else:
        sys.stdout.write("Stopping cluster...\n")
        sys.stdout.flush()
        try:
            pdir, profile = os.environ['IPPPDIR'], os.environ['IPPPROFILE']
            is_slurm = True
        except:
            print('NOT SLURM')
            is_slurm = False

        if is_slurm:
            if pdir is None and profile is None:
                pdir, profile = os.environ['IPPPDIR'], os.environ['IPPPROFILE']
            c = Client(ipython_dir=pdir, profile=profile)
            ee = c[:]
            ne = len(ee)
            print(('Shutting down %d engines.' % (ne)))
            c.close()
            c.shutdown(hub=True)
            shutil.rmtree('profile_' + str(profile))
            try:
                shutil.rmtree('./log/')
            except:
                print('creating log folder')

            files = glob.glob('*.log')
            os.mkdir('./log')

            for fl in files:
                shutil.move(fl, './log/')

        else:
            if ipcluster == "ipcluster":
                proc = subprocess.Popen("ipcluster stop",
                                        shell=True,
                                        stderr=subprocess.PIPE,
                                        close_fds=(os.name != 'nt'))
            else:
                proc = subprocess.Popen(shlex.split(ipcluster + " stop"),
                                        shell=True,
                                        stderr=subprocess.PIPE,
                                        close_fds=(os.name != 'nt'))

            line_out = proc.stderr.readline()
            if b'CRITICAL' in line_out:
                sys.stdout.write("No cluster to stop...")
                sys.stdout.flush()
            elif b'Stopping' in line_out:
                st = time.time()
                sys.stdout.write('Waiting for cluster to stop...')
                while (time.time() - st) < 4:
                    sys.stdout.write('.')
                    sys.stdout.flush()
                    time.sleep(1)
            else:
                print(line_out)
                print(
                    '**** Unrecognized Syntax in ipcluster output, waiting for server to stop anyways ****'
                )

            proc.stderr.close()

    sys.stdout.write(" done\n")
Exemple #8
0
def slurm_map(fnc, iterables, resource_spec,
              env='root', job_name=None, output_path=None,
              n_retries=5, patience=30):
    """

    Args:
      fnc
      iterables
      resource_spec
      env: virtual env to launch engines in
      job_name: name of job to use. Derived from fnc name if not specified
      output_path: location to direct output to.
        If unspecified output is sent to a file (based on job name and timestamp) in ~/logs/slurm
      n_retries: number of times to retry connecting to client if less than the requested number
        of workers are available.
      patience: seconds to wait after failed attempt to connect to client

    """
    resource_spec = process_resource_spec(resource_spec)

    if not profile_installed(PROFILE_NAME):
        print("No profile found for {}, installing".format(PROFILE_NAME))
        install_profile(PROFILE_NAME)

    submission_time = time.strftime("%Y%m%d-%H%M%S")
    cluster_id = '{}_{}'.format(fnc.__name__, submission_time)
    print("Using cluster id: {}".format(cluster_id))

    # break down by line:
    # run in bash
    # activate the specified environment
    # launch controller with desired settings
    controller_cmd_template = ("exec bash -c '"
                               "source activate {env};"
                               " ipcontroller --profile={profile} --sqlitedb --location={hostname} --ip=\'*\' --cluster-id={cluster_id}'")
    controller_cmd = controller_cmd_template.format(
        env=env, profile=PROFILE_NAME, hostname=socket.gethostname(), cluster_id=cluster_id
    )

    print("Starting controller with: {} \n".format(controller_cmd))
    # runs in the background if executed this way
    subprocess.Popen(controller_cmd, shell=True)

    print("Sleeping for 10")
    time.sleep(10)

    engine_cmd_template_path = package_path() + '/templates/slurm_template.sh'
    with open(engine_cmd_template_path,  'r') as engine_cmd_template_file:
        engine_command_template = engine_cmd_template_file.read()


    # prepare engine commands
    if job_name is None:
        job_name = fnc.__name__ + '_slurm_map'
    else:
        assert isinstance(job_name, str)

    if output_path is None:
        output_dir = os.path.expanduser('~/logs/slurm')
        output_path = '{}/{}_{}'.format (output_dir, job_name, submission_time)

        if not os.path.exists(output_dir):
            os.makedirs(output_path)
    else:
        assert isinstance(output_path, str)
        assert os.path.exists(output_path)

    # find path to engine based on specified environment
    if env == 'root':
        engine_path = 'bin/ipengine'
    else:
        engine_path = 'envs/{}/bin/ipengine'.format(env)

    full_engine_path = os.path.expanduser('~/anaconda3/{}'.format(engine_path))
    assert os.path.exists(full_engine_path)



    engine_command = engine_command_template.format(
        job_name=job_name,
        output_path=output_path,
        n_tasks=resource_spec['max_workers'],
        mem_mb=resource_spec['worker_mem_mb'],
        n_cpus=resource_spec['worker_n_cpus'],
        n_gpus=resource_spec['worker_n_gpus'],
        engine_path=engine_path,
        profile=PROFILE_NAME,
        controller_hostname=socket.gethostname(),
        cluster_id=cluster_id,
        comment=job_name
    )

    sbatch_file_path = '/tmp/slurm_map_sbatch_{}.sh'.format(cluster_id)
    with open(sbatch_file_path, 'w') as sbatch_file:
        sbatch_file.write(engine_command)

    # wrap command to execute in bash
    sbatch_command = "exec bash -c 'sbatch {}'".format(sbatch_file_path)

    print("Starting engines")
    # runs in the background if executed this way
    subprocess.Popen(sbatch_command, shell=True)
    print("Sleeping for {}".format(patience))
    time.sleep(patience)

    # TODO: shut down unused engines
    connected = False
    for attempt_idx in range(n_retries):
        print("Attempt {} to connect to cluster".format(attempt_idx))
        try:
            client = Client(profile=PROFILE_NAME, cluster_id=cluster_id)
            if resource_spec['min_workers'] <= len(client.ids) <= resource_spec['max_workers']:
                connected = True
                print('Succesfully connected to cluster with {} engines out of {} requested'.format(
                    len(client.ids), resource_spec['max_workers']))

                if len(client.ids) < resource_spec['max_workers']:
                    warn("{} slurm jobs submitted but only {} are being used.".format(
                        resource_spec['max_workers'], len(client.ids)))

                break
            else:
                print("{} available engines less than minimum requested of {}".format(
                    len(client.ids), resource_spec['min_workers']))
                print("Retrying after {}".format(patience))
                client.close()
                time.sleep(patience)
        except OSError as os_err:
            print("Caught OSError while attempting to connect to {}: {}.".format(PROFILE_NAME, os_err))
            print("Retrying after {}".format(patience))
            time.sleep(patience)
        except TimeoutError as timeout_err:
            print("Caught TimeoutError while attempting to connect to {}: {}".format(PROFILE_NAME, timeout_err))
            print("Retrying after {}".format(patience))
            time.sleep(patience)

    if not connected:
        raise TimeoutError("Failed to connect to client after {} retries".format(n_retries))

    # run tasks
    print("Submitting tasks")
    start_time = time.time()
    client[:].use_cloudpickle()
    lb_view = client.load_balanced_view()
    result = lb_view.map(fnc, iterables, block=True)
    print("Tasks finished after {} seconds".format(time.time() - start_time))

    print("Shutting down cluster")
    client.shutdown(hub=True)
    print("Relinquishing slurm nodes")
    shutdown_cmd =  'scancel -n={job_name}'.format(job_name=job_name)
    shutdown_cmd = "exec bash -c '{}'".format(shutdown_cmd)
    # runs in the background if executed this way
    subprocess.Popen(shutdown_cmd, shell=True)

    print("Removing sbatch script")
    os.remove(sbatch_file_path)

    return result
Exemple #9
0
def gibbs_cli(table_fp, mapping_fp, output_dir, loo, jobs, alpha1, alpha2,
              beta, source_rarefaction_depth, sink_rarefaction_depth, restarts,
              draws_per_restart, burnin, delay, cluster_start_delay,
              per_sink_feature_assignments, sample_with_replacement,
              source_sink_column, source_column_value,
              sink_column_value, source_category_column):
    '''Gibb's sampler for Bayesian estimation of microbial sample sources.

    For details, see the project README file.
    '''
    # Create results directory. Click has already checked if it exists, and
    # failed if so.
    os.mkdir(output_dir)

    # Load the metadata file and feature table.
    sample_metadata = parse_sample_metadata(open(mapping_fp, 'U'))
    feature_table = biom_to_df(load_table(table_fp))

    # Do high level check on feature data.
    feature_table = validate_gibbs_input(feature_table)

    # Remove samples not shared by both feature and metadata tables and order
    # rows equivalently.
    sample_metadata, feature_table = \
        intersect_and_sort_samples(sample_metadata, feature_table)

    # Identify source and sink samples.
    source_samples = get_samples(sample_metadata, source_sink_column,
                                 source_column_value)
    sink_samples = get_samples(sample_metadata, source_sink_column,
                               sink_column_value)

    # If we have no source samples neither normal operation or loo will work.
    # Will also likely get strange errors.
    if len(source_samples) == 0:
        raise ValueError(('You passed %s as the `source_sink_column` and %s '
                          'as the `source_column_value`. There are no samples '
                          'which are sources under these values. Please see '
                          'the help documentation and check your mapping '
                          'file.') % (source_sink_column, source_column_value))

    # Prepare the 'sources' matrix by collapsing the `source_samples` by their
    # metadata values.
    csources = collapse_source_data(sample_metadata, feature_table,
                                    source_samples, source_category_column,
                                    'mean')

    # Rarify collapsed source data if requested.
    if source_rarefaction_depth > 0:
        d = (csources.sum(1) >= source_rarefaction_depth)
        if not d.all():
            count_too_shallow = (~d).sum()
            shallowest = csources.sum(1).min()
            raise ValueError(('You requested rarefaction of source samples at '
                              '%s, but there are %s collapsed source samples '
                              'that have less sequences than that. The '
                              'shallowest of these is %s sequences.') %
                             (source_rarefaction_depth, count_too_shallow,
                              shallowest))
        else:
            csources = subsample_dataframe(csources, source_rarefaction_depth,
                                           replace=sample_with_replacement)

    # Prepare to rarify sink data if we are not doing LOO. If we are doing loo,
    # we skip the rarefaction, and set sinks to `None`.
    if not loo:
        sinks = feature_table.loc[sink_samples, :]
        if sink_rarefaction_depth > 0:
            d = (sinks.sum(1) >= sink_rarefaction_depth)
            if not d.all():
                count_too_shallow = (~d).sum()
                shallowest = sinks.sum(1).min()
                raise ValueError(('You requested rarefaction of sink samples '
                                  'at %s, but there are %s sink samples that '
                                  'have less sequences than that. The '
                                  'shallowest of these is %s sequences.') %
                                 (sink_rarefaction_depth, count_too_shallow,
                                  shallowest))
            else:
                sinks = subsample_dataframe(sinks, sink_rarefaction_depth,
                                            replace=sample_with_replacement)
    else:
        sinks = None

    # If we've been asked to do multiple jobs, we need to spin up a cluster.
    if jobs > 1:
        # Launch the ipcluster and wait for it to come up.
        subprocess.Popen('ipcluster start -n %s --quiet' % jobs, shell=True)
        time.sleep(cluster_start_delay)
        cluster = Client()
    else:
        cluster = None

    # Run the computations.
    mpm, mps, fas = gibbs(csources, sinks, alpha1, alpha2, beta, restarts,
                          draws_per_restart, burnin, delay, cluster=cluster,
                          create_feature_tables=per_sink_feature_assignments)

    # If we started a cluster, shut it down.
    if jobs > 1:
        cluster.shutdown(hub=True)

    # Write results.
    mpm.to_csv(os.path.join(output_dir, 'mixing_proportions.txt'), sep='\t')
    mps.to_csv(os.path.join(output_dir, 'mixing_proportions_stds.txt'),
               sep='\t')
    if per_sink_feature_assignments:
        for sink, fa in zip(mpm.index, fas):
            fa.to_csv(os.path.join(output_dir, sink + '.feature_table.txt'),
                      sep='\t')

    # Plot contributions.
    fig, ax = plot_heatmap(mpm)
    fig.savefig(os.path.join(output_dir, 'mixing_proportions.pdf'), dpi=300)
Exemple #10
0
def gibbs(table_fp, mapping_fp, output_dir, loo, jobs, alpha1, alpha2, beta,
          source_rarefaction_depth, sink_rarefaction_depth,
          restarts, draws_per_restart, burnin, delay, cluster_start_delay,
          source_sink_column, source_column_value, sink_column_value,
          source_category_column):
    '''Gibb's sampler for Bayesian estimation of microbial sample sources.

    For details, see the project README file.
    '''
    # Create results directory. Click has already checked if it exists, and
    # failed if so.
    os.mkdir(output_dir)

    # Load the mapping file and biom table and remove samples which are not
    # shared.
    o = open(mapping_fp, 'U')
    sample_metadata_lines = o.readlines()
    o.close()

    sample_metadata, biom_table = \
        _cli_sync_biom_and_sample_metadata(
            parse_mapping_file(sample_metadata_lines),
            load_table(table_fp))

    # If biom table has fractional counts, it can produce problems in indexing
    # later on.
    biom_table.transform(lambda data, id, metadata: np.ceil(data))

    # If biom table has sample metadata, there will be pickling errors when
    # submitting multiple jobs. We remove the metadata by making a copy of the
    # table without metadata.
    biom_table = Table(biom_table._data.toarray(),
                       biom_table.ids(axis='observation'),
                       biom_table.ids(axis='sample'))

    # Parse the mapping file and options to get the samples requested for
    # sources and sinks.
    source_samples, sink_samples = sinks_and_sources(
        sample_metadata, column_header=source_sink_column,
        source_value=source_column_value, sink_value=sink_column_value)

    # If we have no source samples neither normal operation or loo will work.
    # Will also likely get strange errors.
    if len(source_samples) == 0:
        raise ValueError('Mapping file or biom table passed contain no '
                         '`source` samples.')

    # Prepare the 'sources' matrix by collapsing the `source_samples` by their
    # metadata values.
    sources_envs, sources_data = collapse_sources(source_samples,
                                                  sample_metadata,
                                                  source_category_column,
                                                  biom_table, sort=True)

    # Rarefiy data if requested.
    sources_data, biom_table = \
        subsample_sources_sinks(sources_data, sink_samples, biom_table,
                                source_rarefaction_depth,
                                sink_rarefaction_depth)

    # Build function that require only a single parameter -- sample -- to
    # enable parallel processing if requested.
    if loo:
        f = partial(_cli_loo_runner, source_category=source_category_column,
                    alpha1=alpha1, alpha2=alpha2, beta=beta,
                    restarts=restarts, draws_per_restart=draws_per_restart,
                    burnin=burnin, delay=delay,
                    sample_metadata=sample_metadata,
                    sources_data=sources_data, sources_envs=sources_envs,
                    biom_table=biom_table, output_dir=output_dir)
        sample_iter = source_samples
    else:
        f = partial(_cli_sink_source_prediction_runner, alpha1=alpha1,
                    alpha2=alpha2, beta=beta, restarts=restarts,
                    draws_per_restart=draws_per_restart, burnin=burnin,
                    delay=delay, sources_data=sources_data,
                    biom_table=biom_table, output_dir=output_dir)
        sample_iter = sink_samples

    if jobs > 1:
        # Launch the ipcluster and wait for it to come up.
        subprocess.Popen('ipcluster start -n %s --quiet' % jobs, shell=True)
        time.sleep(cluster_start_delay)
        c = Client()
        c[:].map(f, sample_iter, block=True)
        # Shut the cluster down. Answer taken from SO:
        # http://stackoverflow.com/questions/30930157/stopping-ipcluster-engines-ipython-parallel
        c.shutdown(hub=True)
    else:
        for sample in sample_iter:
            f(sample)

    # Format results for output.
    samples = []
    samples_data = []
    for sample_fp in glob.glob(os.path.join(output_dir, '*')):
        samples.append(sample_fp.strip().split('/')[-1].split('.txt')[0])
        samples_data.append(np.loadtxt(sample_fp, delimiter='\t'))
    mp, mps = _cli_collate_results(samples, samples_data, sources_envs)

    o = open(os.path.join(output_dir, 'mixing_proportions.txt'), 'w')
    o.writelines(mp)
    o.close()
    o = open(os.path.join(output_dir, 'mixing_proportions_stds.txt'), 'w')
    o.writelines(mps)
    o.close()
Exemple #11
0
def gibbs_cli(table_fp, mapping_fp, output_dir, loo, jobs, alpha1, alpha2,
              beta, source_rarefaction_depth, sink_rarefaction_depth, restarts,
              draws_per_restart, burnin, delay, cluster_start_delay,
              per_sink_feature_assignments, sample_with_replacement,
              source_sink_column, source_column_value, sink_column_value,
              source_category_column):
    '''Gibb's sampler for Bayesian estimation of microbial sample sources.

    For details, see the project README file.
    '''
    # Create results directory. Click has already checked if it exists, and
    # failed if so.
    os.mkdir(output_dir)

    # Load the metadata file and feature table.
    sample_metadata = parse_sample_metadata(open(mapping_fp, 'U'))
    feature_table = biom_to_df(load_table(table_fp))

    # Do high level check on feature data.
    feature_table = validate_gibbs_input(feature_table)

    # Remove samples not shared by both feature and metadata tables and order
    # rows equivalently.
    sample_metadata, feature_table = \
        intersect_and_sort_samples(sample_metadata, feature_table)

    # Identify source and sink samples.
    source_samples = get_samples(sample_metadata, source_sink_column,
                                 source_column_value)
    sink_samples = get_samples(sample_metadata, source_sink_column,
                               sink_column_value)

    # If we have no source samples neither normal operation or loo will work.
    # Will also likely get strange errors.
    if len(source_samples) == 0:
        raise ValueError(('You passed %s as the `source_sink_column` and %s '
                          'as the `source_column_value`. There are no samples '
                          'which are sources under these values. Please see '
                          'the help documentation and check your mapping '
                          'file.') % (source_sink_column, source_column_value))

    # Prepare the 'sources' matrix by collapsing the `source_samples` by their
    # metadata values.
    csources = collapse_source_data(sample_metadata, feature_table,
                                    source_samples, source_category_column,
                                    'mean')

    # Rarify collapsed source data if requested.
    if source_rarefaction_depth > 0:
        d = (csources.sum(1) >= source_rarefaction_depth)
        if not d.all():
            count_too_shallow = (~d).sum()
            shallowest = csources.sum(1).min()
            raise ValueError(
                ('You requested rarefaction of source samples at '
                 '%s, but there are %s collapsed source samples '
                 'that have less sequences than that. The '
                 'shallowest of these is %s sequences.') %
                (source_rarefaction_depth, count_too_shallow, shallowest))
        else:
            csources = subsample_dataframe(csources,
                                           source_rarefaction_depth,
                                           replace=sample_with_replacement)

    # Prepare to rarify sink data if we are not doing LOO. If we are doing loo,
    # we skip the rarefaction, and set sinks to `None`.
    if not loo:
        sinks = feature_table.loc[sink_samples, :]
        if sink_rarefaction_depth > 0:
            d = (sinks.sum(1) >= sink_rarefaction_depth)
            if not d.all():
                count_too_shallow = (~d).sum()
                shallowest = sinks.sum(1).min()
                raise ValueError(
                    ('You requested rarefaction of sink samples '
                     'at %s, but there are %s sink samples that '
                     'have less sequences than that. The '
                     'shallowest of these is %s sequences.') %
                    (sink_rarefaction_depth, count_too_shallow, shallowest))
            else:
                sinks = subsample_dataframe(sinks,
                                            sink_rarefaction_depth,
                                            replace=sample_with_replacement)
    else:
        sinks = None

    # If we've been asked to do multiple jobs, we need to spin up a cluster.
    if jobs > 1:
        # Launch the ipcluster and wait for it to come up.
        subprocess.Popen('ipcluster start -n %s --quiet' % jobs, shell=True)
        time.sleep(cluster_start_delay)
        cluster = Client()
    else:
        cluster = None

    # Run the computations.
    mpm, mps, fas = gibbs(csources,
                          sinks,
                          alpha1,
                          alpha2,
                          beta,
                          restarts,
                          draws_per_restart,
                          burnin,
                          delay,
                          cluster=cluster,
                          create_feature_tables=per_sink_feature_assignments)

    # If we started a cluster, shut it down.
    if jobs > 1:
        cluster.shutdown(hub=True)

    # Write results.
    mpm.to_csv(os.path.join(output_dir, 'mixing_proportions.txt'), sep='\t')
    mps.to_csv(os.path.join(output_dir, 'mixing_proportions_stds.txt'),
               sep='\t')
    if per_sink_feature_assignments:
        for sink, fa in zip(mpm.index, fas):
            fa.to_csv(os.path.join(output_dir, sink + '.feature_table.txt'),
                      sep='\t')

    # Plot contributions.
    fig, ax = plot_heatmap(mpm)
    fig.savefig(os.path.join(output_dir, 'mixing_proportions.pdf'), dpi=300)
Exemple #12
0
class Snudda(object):

    ############################################################################

    def __init__(self, networkPath):

        if (networkPath[-1] == "/"):
            self.networkPath = networkPath[:-1]
        else:
            self.networkPath = networkPath

        # Add current dir to python path
        sys.path.append(os.getcwd())

        self.start = timeit.default_timer()

    ############################################################################

    def helpInfo(self, args):
        from .snudda_help import snudda_help_text
        print(snudda_help_text())

    ############################################################################

    def initConfig(self, args):
        # self.networkPath = args.path
        print("Creating config file")
        print("Network path: " + str(self.networkPath))

        assert args.size is not None, \
          "You need to speicfy --size when initialising config for network2"

        from .init import SnuddaInit
        structDef = {
            "Striatum": args.size,
            "GPe": 0,
            "GPi": 0,
            "SNr": 0,
            "STN": 0,
            "Cortex": 0,
            "Thalamus": 0
        }
        # Cortex and thalamus axons disabled right now, set to 1 to include one

        if not args.overwrite:
            assert not os.path.exists(self.networkPath), \
              "Network path " + str(self.networkPath) + " already exists" \
              + " (aborting to prevent accidental overwriting)"

        self.makeDirIfNeeded(self.networkPath)

        nChannels = args.nchannels

        configFile = self.networkPath + "/network-config.json"
        SnuddaInit(structDef=structDef,
                   configName=configFile,
                   nChannels=nChannels)

        if (args.size > 1e5):
            print("Make sure there is enough disk space in " +
                  str(self.networkPath))
            print("Large networks take up ALOT of space")

    ############################################################################

    def placeNeurons(self, args):
        # self.networkPath = args.path
        print("Placing neurons")
        print("Network path: " + str(self.networkPath))

        configFile = self.networkPath + "/network-config.json"
        positionFile = self.networkPath + "/network-neuron-positions.hdf5"
        logFileName = self.networkPath + "/log/logFile-place-neurons.txt"

        self.setupLogFile(logFileName)  # sets self.logFile
        self.setupParallel()  # sets self.dView and self.lbView

        from .place import SnuddaPlace

        if (args.h5legacy):
            h5libver = "earliest"
        else:
            h5libver = "latest"  # default

        npn = SnuddaPlace(config_file=configFile,
                          logFile=self.logFile,
                          verbose=True,
                          dView=self.dView,
                          h5libver=h5libver)

        npn.writeDataHDF5(positionFile)

        self.stopParallel()
        self.closeLogFile()

    ############################################################################

    def touchDetection(self, args):
        # self.networkPath = args.path
        print("Touch detection")
        print("Network path: " + str(self.networkPath))

        if (args.hvsize is not None):
            hyperVoxelSize = int(args.hvsize)
        else:
            hyperVoxelSize = 100

        if (args.volumeID is not None):
            volumeID = args.volumeID
        else:
            volumeID = None

        logDir = self.networkPath + "/log"

        configFile = self.networkPath + "/network-config.json"
        positionFile = self.networkPath + "/network-neuron-positions.hdf5"
        logFileName = self.networkPath + "/log/logFile-touch-detection.txt"
        saveFile = self.networkPath + "/voxels/network-putative-synapses.hdf5"

        voxelDir = self.networkPath + "/voxels"
        self.makeDirIfNeeded(voxelDir)

        self.setupLogFile(logFileName)  # sets self.logFile
        self.setupParallel()  # sets self.dView and self.lbView

        if (args.h5legacy):
            h5libver = "earliest"
        else:
            h5libver = "latest"  # default

        from .detect import SnuddaDetect

        if (args.cont):
            # Continue previous run
            print("Continuing previous touch detection")

            ncv = SnuddaDetect(configFile=configFile,
                               positionFile=positionFile,
                               logFile=self.logFile,
                               saveFile=saveFile,
                               SlurmID=self.SlurmID,
                               volumeID=volumeID,
                               rc=self.rc,
                               hyperVoxelSize=hyperVoxelSize,
                               h5libver=h5libver,
                               restartDetectionFlag=False)

        else:
            ncv = SnuddaDetect(configFile=configFile,
                               positionFile=positionFile,
                               logFile=self.logFile,
                               saveFile=saveFile,
                               SlurmID=self.SlurmID,
                               volumeID=volumeID,
                               rc=self.rc,
                               h5libver=h5libver,
                               hyperVoxelSize=hyperVoxelSize)

        self.stopParallel()
        self.closeLogFile()

    ############################################################################

    def pruneSynapses(self, args):
        # self.networkPath = args.path
        print("Prune synapses")
        print("Network path: " + str(self.networkPath))

        from .prune import SnuddaPrune

        logFileName = self.networkPath + "/log/logFile-synapse-pruning.txt"

        workLog = self.networkPath + "/log/network-detect-worklog.hdf5"

        self.setupLogFile(logFileName)  # sets self.logFile
        self.setupParallel()  # sets self.dView and self.lbView

        # Optionally set this
        scratchPath = None

        if (args.mergeonly):
            preMergeOnly = True
        else:
            preMergeOnly = False

        print("preMergeOnly : " + str(preMergeOnly))

        if (args.h5legacy):
            h5libver = "earliest"
        else:
            h5libver = "latest"  # default

        ncvp = SnuddaPrune(workHistoryFile=workLog,
                           logFile=self.logFile,
                           logFileName=logFileName,
                           dView=self.dView,
                           lbView=self.lbView,
                           scratchPath=scratchPath,
                           h5libver=h5libver,
                           preMergeOnly=preMergeOnly)

        self.stopParallel()
        self.closeLogFile()

    ############################################################################

    def setupInput(self, args):

        from .input import SnuddaInput

        print("Setting up inputs, assuming input.json exists")
        logFileName = self.networkPath + "/log/logFile-setup-input.log"
        self.setupLogFile(logFileName)  # sets self.logFile
        self.setupParallel()  # sets self.dView and self.lbView

        if "input" in args:
            inputConfig = args.input
        else:
            inputConfig = self.networkPath + "/input.json"

        if (not os.path.isfile(inputConfig)):
            print("Missing input config file: " + str(inputConfig))
            return

        if (args.networkFile):
            networkFile = args.networkFile
        else:
            networkFile = self.networkPath \
              + "/network-pruned-synapses.hdf5"

        if (args.inputFile):
            spikeFile = args.inputFile
        else:
            spikeFile = self.networkPath + "/input-spikes.hdf5"

        if (args.time):
            inputTime = args.time

        print("Writing input spikes to " + spikeFile)

        ni = SnuddaInput(inputConfigFile=inputConfig,
                         HDF5networkFile=networkFile,
                         spikeDataFileName=spikeFile,
                         time=inputTime,
                         logFile=self.logFile)

        self.stopParallel()
        self.closeLogFile()

    ############################################################################

    def exportToSONATA(self, args):

        from ConvertNetwork import ConvertNetwork

        print("Exporting to SONATA format")
        print("Network path: " + str(self.networkPath))

        if (args.networkFile):
            networkFile = args.networkFile
        else:
            networkFile = self.networkPath \
              + "/network-pruned-synapses.hdf5"

        if (args.inputFile):
            inputFile = args.inputFile
        else:
            inputFile = self.networkPath + "/input-spikes.hdf5"

        outDir = self.networkPath + "/SONATA/"

        cn = ConvertNetwork(networkFile=networkFile,
                            inputFile=inputFile,
                            outDir=outDir)

    ############################################################################

    def simulate(self, args):

        start = timeit.default_timer()

        from .simulate import SnuddaSimulate

        if (args.networkFile):
            networkFile = args.networkFile
        else:
            networkFile = self.networkPath \
              + "/network-pruned-synapses.hdf5"

        if (args.inputFile):
            inputFile = args.inputFile
        else:
            inputFile = self.networkPath + "/input-spikes.hdf5"

        self.makeDirIfNeeded(self.networkPath + "/simulation")

        print("Using input file " + inputFile)

        #nWorkers = args.ncores
        #print("Using " + str(nWorkers) + " workers for neuron")

        # Problems with nested symbolic links when the second one is a relative
        # path going beyond the original base path
        if (args.mechDir is None):
            mechDir = os.path.dirname(networkFile) + "/mechanisms"

            # !!! problem with paths, testing to create mechanism dir in current dir
            mechDir = "mechanisms"

            if (not os.path.exists(mechDir)):
                mDir = os.path.dirname(__file__) + "/data/cellspecs/mechanisms"
                os.symlink(mDir, mechDir)
        else:
            mechDir = args.mechDir

        # !!! These are saved in current directory x86_64
        # --- problem since nrnivmodl seems to want a relative path...

        makeModsStr = "nrnivmodl " + mechDir
        if (not os.path.exists('x86_64')):
            print("Please first run: " + makeModsStr)
            exit(-1)
            # I was having problems when running nrnivmodl in the script, but
            # running it manually in bash works... WHY?!!

        # os.system(makeModsStr)

        saveDir = os.path.dirname(networkFile) + "/simulation/"

        if (not os.path.exists(saveDir)):
            print("Creating directory " + saveDir)
            os.makedirs(saveDir, exist_ok=True)

        # Get the SlurmID, used in default file names
        SlurmID = os.getenv('SLURM_JOBID')

        if (SlurmID is None):
            SlurmID = str(666)

        print("args: " + str(args))

        if (args.voltOut is not None):
            # Save neuron voltage
            if (args.voltOut == "default"):
                voltFile = saveDir + 'network-voltage-' + SlurmID + '.csv'
            else:
                voltFile = args.voltOut
        else:
            voltFile = None

        if (args.spikesOut is None or args.spikesOut == "default"):
            spikesFile = saveDir + 'network-output-spikes-' + SlurmID + '.txt'
        else:
            spikesFile = args.spikesOut

        disableGJ = args.disableGJ
        if (disableGJ):
            print("!!! WE HAVE DISABLED GAP JUNCTIONS !!!")

        logFile = os.path.dirname(networkFile) \
          + "/log/network-simulation-log.txt"

        logDir = os.path.dirname(networkFile) + "/log"
        if (not os.path.exists(logDir)):
            print("Creating directory " + logDir)
            os.makedirs(logDir, exist_ok=True)

        from mpi4py import MPI  # This must be imported before neuron, to run parallel
        from neuron import h  #, gui

        pc = h.ParallelContext()

        sim = SnuddaSimulate(networkFile=networkFile,
                             inputFile=inputFile,
                             disableGapJunctions=disableGJ,
                             logFile=logFile,
                             verbose=args.verbose)

        sim.addExternalInput()
        sim.checkMemoryStatus()

        if (voltFile is not None):
            sim.addRecording(
                sideLen=None)  # Side len let you record from a subset
            #sim.addRecordingOfType("dSPN",5) # Side len let you record from a subset

        tSim = args.time * 1000  # Convert from s to ms for Neuron simulator

        sim.checkMemoryStatus()
        print("Running simulation for " + str(tSim) + " ms.")
        sim.run(tSim)  # In milliseconds

        print("Simulation done, saving output")
        if (spikesFile is not None):
            sim.writeSpikes(spikesFile)

        if (voltFile is not None):
            sim.writeVoltage(voltFile)

        stop = timeit.default_timer()
        if (sim.pc.id() == 0):
            print("Program run time: " + str(stop - start))

        # sim.plot()
        exit(0)

        #cmdStr = "nrnivmodl " + mechDir + " && mpiexec -n " + str(nWorkers) + " -map-by socket:OVERSUBSCRIBE python3 " + os.path.dirname(__file__) + " simulate.py " + networkFile + " " + inputFile + " --time " + str(args.time)

        #if(args.voltOut is not None):
        #  cmdStr += " --voltOut " + args.voltOut

        #os.system(cmdStr)

    ############################################################################

    def analyse(self, args):

        print("Add analysis code here, see Network_analyse.py")

    ############################################################################

    def setupParallel(self):
        self.SlurmID = os.getenv('SLURM_JOBID')

        if (self.SlurmID is None):
            self.SlurmID = self.nextRunID()
        else:
            self.SlurmID = int(self.SlurmID)

        self.logFile.write("Using SlurmID: " + str(self.SlurmID))

        if (os.getenv('IPYTHON_PROFILE') is not None):

            self.logFile.write('Creating ipyparallel client\n')

            from ipyparallel import Client
            #self.rc = Client(profile=os.getenv('IPYTHON_PROFILE'),
            #            # sshserver='127.0.0.1',
            #            debug=False)

            ufile = os.getenv('IPYTHONDIR') + "/profile_" \
                    + os.getenv('IPYTHON_PROFILE') \
                    + "/security/ipcontroller-client.json"
            self.rc = Client(url_file=ufile, timeout=120, debug=False)

            self.logFile.write('Client IDs: ' + str(self.rc.ids))

            # http://davidmasad.com/blog/simulation-with-ipyparallel/
            # http://people.duke.edu/~ccc14/sta-663-2016/19C_IPyParallel.html
            self.dView = self.rc.direct_view(
                targets='all')  # rc[:] # Direct view into clients
            self.lbView = self.rc.load_balanced_view(targets='all')

            # Define nc globally
            # self.dView.execute("nc = None",block=True)
        else:
            self.logFile.write(
                "No IPYTHON_PROFILE enviroment variable set, running in serial"
            )
            self.dView = None
            self.lbView = None
            self.rc = None

    ############################################################################

    def stopParallel(self):

        # Disable this function, keep the pool running for now
        return

        if (self.rc is not None):
            print("Stopping ipyparallel")
            self.rc.shutdown(hub=True)

    ############################################################################

    def setupLogFile(self, logFileName):
        dataDir = os.path.dirname(logFileName)

        self.makeDirIfNeeded(dataDir)

        try:
            self.logFile = open(logFileName, 'w')
            self.logFile.write('Starting log file\n')
        except:
            print("Unable to set up log file " + str(logFileName))

    ############################################################################

    def closeLogFile(self):

        stop = timeit.default_timer()

        print("\nProgram run time: " + str(stop - self.start))

        self.logFile.write("Program run time: " + str(stop - self.start))
        self.logFile.write("End of log. Closing file.")
        self.logFile.close()

    ##############################################################################

    def nextRunID(self):

        import pickle

        runIDfile = ".runID.pickle"

        try:
            if (os.path.isfile(runIDfile)):

                with open(runIDfile, 'rb') as f:
                    runID = pickle.load(f)
                    nextID = int(np.ceil(np.max(runID)) + 1)

                runID.append(nextID)

                with open(runIDfile, 'wb') as f:
                    pickle.dump(runID, f, -1)

            else:

                with open(runIDfile, 'wb') as f:
                    nextID = 1
                    runID = [1]
                    pickle.dump(runID, f, -1)

        except Exception as e:
            import traceback
            tstr = traceback.format_exc()
            print(tstr)

            print("Problem reading .runID.pickle file, setting runID to 0")
            import pdb
            pdb.set_trace()
            return 0

        print("Using runID = " + str(nextID))

        return nextID

############################################################################

    def makeDirIfNeeded(self, dirPath):

        if (not os.path.exists(dirPath)):
            print("Creating missing directory " + dirPath)
            os.makedirs(dirPath)
Exemple #13
0
def stop_server(ipcluster='ipcluster', pdir=None, profile=None, dview=None):
    """
    programmatically stops the ipyparallel server

    Parameters:
     ----------
     ipcluster : str
         ipcluster binary file name; requires 4 path separators on Windows
         Default: "ipcluster"

    """
    if 'multiprocessing' in str(type(dview)):
        dview.terminate()
    else:
        logger.info("Stopping cluster...")
        try:
            pdir, profile = os.environ['IPPPDIR'], os.environ['IPPPROFILE']
            is_slurm = True
        except:
            logger.debug('stop_server: not a slurm cluster')
            is_slurm = False

        if is_slurm:
            if pdir is None and profile is None:
                pdir, profile = os.environ['IPPPDIR'], os.environ['IPPPROFILE']
            c = Client(ipython_dir=pdir, profile=profile)
            ee = c[:]
            ne = len(ee)
            print(('Shutting down %d engines.' % (ne)))
            c.close()
            c.shutdown(hub=True)
            shutil.rmtree('profile_' + str(profile))
            try:
                shutil.rmtree('./log/')
            except:
                print('creating log folder')

            files = glob.glob('*.log')
            os.mkdir('./log')

            for fl in files:
                shutil.move(fl, './log/')

        else:
            if ipcluster == "ipcluster":
                proc = subprocess.Popen(
                    "ipcluster stop", shell=True, stderr=subprocess.PIPE, close_fds=(os.name != 'nt'))
            else:
                proc = subprocess.Popen(shlex.split(ipcluster + " stop"),
                                        shell=True, stderr=subprocess.PIPE, close_fds=(os.name != 'nt'))

            line_out = proc.stderr.readline()
            if b'CRITICAL' in line_out:
                logger.info("No cluster to stop...")
            elif b'Stopping' in line_out:
                st = time.time()
                logger.debug('Waiting for cluster to stop...')
                while (time.time() - st) < 4:
                    sys.stdout.write('.')
                    sys.stdout.flush()
                    time.sleep(1)
            else:
                print(line_out)
                print(
                    '**** Unrecognized syntax in ipcluster output, waiting for server to stop anyways ****')

            proc.stderr.close()

    logger.info("stop_cluster(): done")