コード例 #1
0
ファイル: run.py プロジェクト: BIDS-Apps/C-PAC
def load_yaml_config(config_filename, aws_input_creds):

    if config_filename.lower().startswith("s3://"):
        # s3 paths begin with s3://bucket/
        bucket_name = config_filename.split('/')[2]
        s3_prefix = '/'.join(config_filename.split('/')[:3])
        prefix = config_filename.replace(s3_prefix, '').lstrip('/')

        if aws_input_creds:
            if not os.path.isfile(aws_input_creds):
                raise IOError("Could not find aws_input_creds (%s)" %
                              (aws_input_creds))

        from indi_aws import fetch_creds
        bucket = fetch_creds.return_bucket(aws_input_creds, bucket_name)

        bucket.download_file(prefix, '/scratch/'+os.path.basename(config_filename))

        config_filename = '/scratch/'+os.path.basename(config_filename)

    config_filename = os.path.realpath(config_filename)
    if os.path.isfile(config_filename):
        with open(config_filename,'r') as infd:
            config_data = yaml.load(infd)

    return(config_data)
def download_outputs(path_prefix, creds_path, bucket_name, qap_type, \
                          download_to):

    import pickle
    from indi_aws import fetch_creds
    from indi_aws.aws_utils import s3_download

    src_list = []

    bucket = fetch_creds.return_bucket(creds_path, bucket_name)

    if qap_type == "anat_spatial":
        search_for = "anatomical_spatial"
    elif qap_type == "func_spatial":
        search_for = "functional_spatial"
    elif qap_type == "func_temporal":
        search_for = "functional_temporal"

    for k in bucket.list(prefix=path_prefix):

        k_name = str(k.name)

        if (search_for in k_name) and (".csv" in k_name):

            src_list.append(k_name)

    s3_download(bucket, src_list, download_to)
コード例 #3
0
def load_yaml_config(config_filename, aws_input_creds):

    if config_filename.lower().startswith("s3://"):
        # s3 paths begin with s3://bucket/
        bucket_name = config_filename.split('/')[2]
        s3_prefix = '/'.join(config_filename.split('/')[:3])
        prefix = config_filename.replace(s3_prefix, '').lstrip('/')

        if aws_input_creds:
            if not os.path.isfile(aws_input_creds):
                raise IOError("Could not find aws_input_creds (%s)" %
                              (aws_input_creds))

        from indi_aws import fetch_creds
        bucket = fetch_creds.return_bucket(aws_input_creds, bucket_name)
        downloaded_config = '/tmp/' + os.path.basename(config_filename)
        bucket.download_file(prefix, downloaded_config)
        config_filename = downloaded_config

    config_filename = os.path.realpath(config_filename)

    try:
        with open(config_filename, 'r') as f:
            config_data = yaml.load(f)
            return config_data
    except IOError:
        print("Error! Could not find config file {0}".format(config_filename))
        raise
コード例 #4
0
def load_yaml_config(config_filename, aws_input_creds):

    if config_filename.lower().startswith("s3://"):
        # s3 paths begin with s3://bucket/
        bucket_name = config_filename.split('/')[2]
        s3_prefix = '/'.join(config_filename.split('/')[:3])
        prefix = config_filename.replace(s3_prefix, '').lstrip('/')

        if aws_input_creds:
            if not os.path.isfile(aws_input_creds):
                raise IOError("Could not find aws_input_creds (%s)" %
                              (aws_input_creds))

        from indi_aws import fetch_creds
        bucket = fetch_creds.return_bucket(aws_input_creds, bucket_name)

        bucket.download_file(prefix,
                             '/scratch/' + os.path.basename(config_filename))

        config_filename = '/scratch/' + os.path.basename(config_filename)

    config_filename = os.path.realpath(config_filename)
    if os.path.isfile(config_filename):
        with open(config_filename, 'r') as infd:
            config_data = yaml.load(infd)

    return (config_data)
def download_outputs(path_prefix, creds_path, bucket_name, qap_type, \
                          download_to):
                          
    import pickle
    from indi_aws import fetch_creds
    from indi_aws.aws_utils import s3_download

    src_list = []

    bucket = fetch_creds.return_bucket(creds_path, bucket_name)


    if qap_type == "anat_spatial":
        search_for = "anatomical_spatial"
    elif qap_type == "func_spatial":
        search_for = "functional_spatial"
    elif qap_type == "func_temporal":
        search_for = "functional_temporal"


    for k in bucket.list(prefix=path_prefix):

        k_name = str(k.name)
    
        if (search_for in k_name) and (".csv" in k_name):
    
            src_list.append(k_name)
        
 
    s3_download(bucket, src_list, download_to)
コード例 #6
0
def pull_NIFTI_file_list_from_s3(s3_directory, s3_creds):

    import os
    try:
        from indi_aws import fetch_creds
    except:
        err = "\n\n[!] You need the INDI AWS package installed in order to " \
              "pull from an S3 bucket. Try 'pip install indi_aws'\n\n"
        raise Exception(err)

    s3_list = []

    s3_path = s3_directory.replace("s3://","")
    bucket_name = s3_path.split("/")[0]
    bucket_prefix = s3_path.split(bucket_name + "/")[1]

    bucket = fetch_creds.return_bucket(s3_creds, bucket_name)

    # Build S3-subjects to download
    # maintain the "s3://<bucket_name>" prefix!!
    print("Gathering file paths from {0}\n".format(s3_directory))
    for bk in bucket.objects.filter(Prefix=bucket_prefix):
        if ".nii" in str(bk.key):
            s3_list.append(os.path.join("s3://", bucket_name, str(bk.key)))

    if len(s3_list) == 0:
        err = "\n\n[!] No filepaths were found given the S3 path provided!" \
              "\n\n"
        raise Exception(err)

    return s3_list
コード例 #7
0
def list_files(path, s3_creds_path=None):
    if path.startswith('s3://'):
        pieces = path[5:].split('/')
        bucket_name, path = pieces[0], '/'.join(pieces[1:])
        bucket = fetch_creds.return_bucket(s3_creds_path, bucket_name)
        return [
            's3://%s/%s' % (bucket, obj['Key'])
            for obj in bucket.objects.filter(Prefix=path)
        ]
    else:
        return list(glob.glob(path + '/*'))
コード例 #8
0
def gather_nifti_file_paths(dataset_folder, creds_path=None):

    import os

    s3_prefix = "s3://s3.amazonaws.com"

    file_path_list = []

    # paths that include s3:// are assumed to live in AWS Simple Storage Service
    if "s3://" in dataset_folder:
        try:
            from indi_aws import fetch_creds
        except Exception as e:
            print "Error ({0:s}): Could not import indi_aws package".format(
                e.message)
            raise (e)

        try:
            s3_path_vals = (dataset_folder.replace(s3_prefix, "")).split('/')
            bucket_name = s3_path_vals[1]
            data_path = "/".join(s3_path_vals[2:])
        except Exception as e:
            print "Error ({0:s}): There is a problem with s3 path {1:s}".format(
                e.message, dataset_folder)
            raise (e)

        print "Extracting NIfTI paths from s3 bucket {0:s}::{1:s})".format(
            bucket_name, data_path)

        bucket = fetch_creds.return_bucket(creds_path, bucket_name)

        # Build S3-subjects to download
        for bk in bucket.objects.filter(Prefix=data_path):
            if str(bk.key).endswith(".nii") or str(bk.key).endswith(".nii.gz"):
                file_path_list.append(
                    os.path.join(s3_prefix, bucket_name, str(bk.key)))

    else:

        print "Extracting NIfTI paths from local filesystem"
        for root, folders, files in os.walk(os.path.abspath(dataset_folder)):
            for filename in files:
                if filename.endswith('.nii') or filename.endswith('.nii.gz'):
                    file_path_list.append(os.path.join(root, filename))

    if not file_path_list:
        raise Exception("Did not find any nifti files in %s" %
                        (dataset_folder))

    return (file_path_list)
コード例 #9
0
ファイル: test_init.py プロジェクト: dingwencai6/C-PAC
def download_cpac_resources_from_s3(local_base):
    '''
    Function to download the CPAC testing resources directory from
    S3

    Parameters
    ----------
    local_base : string
        the local directory to save the 'cpac_resources' contents
    '''

    # Import packages
    import os

    from indi_aws import aws_utils, fetch_creds

    # Init variables
    bucket_name = default_bucket_name()
    resource_folder = 'cpac_resources'
    s3_prefix = os.path.join('data/test_resources', resource_folder)

    # Get bucket object
    bucket = fetch_creds.return_bucket(None, bucket_name)

    # Gather files from bucket
    for obj in bucket.objects.filter(Prefix=s3_prefix):
        bkey = obj.key
        # If the object is just a folder, move on to next object
        if bkey.endswith('/'):
            continue

        # Form local path from key
        local_path = os.path.join(local_base,
                                  bkey.split(resource_folder)[-1].lstrip('/'))

        # Make download directories
        local_dir = os.path.dirname(local_path)
        if not os.path.exists(local_dir):
            os.makedirs(local_dir)

        # Download file if it doesn't exist
        if not os.path.exists(local_path):
            bucket.download_file(bkey,
                                 local_path,
                                 Callback=aws_utils.ProgressPercentage(obj))

    # Print done
    print 'CPAC resources folder in %s is complete!' % local_base
コード例 #10
0
ファイル: test_init.py プロジェクト: FCP-INDI/C-PAC
def download_cpac_resources_from_s3(local_base):
    '''
    Function to download the CPAC testing resources directory from
    S3

    Parameters
    ----------
    local_base : string
        the local directory to save the 'cpac_resources' contents
    '''

    # Import packages
    import os

    from indi_aws import aws_utils, fetch_creds

    # Init variables
    bucket_name = default_bucket_name()
    resource_folder = 'cpac_resources'
    s3_prefix = os.path.join('data/test_resources', resource_folder)

    # Get bucket object
    bucket = fetch_creds.return_bucket(None, bucket_name)

    # Gather files from bucket
    for obj in bucket.objects.filter(Prefix=s3_prefix):
        bkey = obj.key
        # If the object is just a folder, move on to next object
        if bkey.endswith('/'):
            continue

        # Form local path from key
        local_path = os.path.join(local_base,
                                  bkey.split(resource_folder)[-1].lstrip('/'))

        # Make download directories
        local_dir = os.path.dirname(local_path)
        if not os.path.exists(local_dir):
            os.makedirs(local_dir)

        # Download file if it doesn't exist
        if not os.path.exists(local_path):
            bucket.download_file(bkey, local_path,
                                 Callback=aws_utils.ProgressPercentage(obj))

    # Print done
    print 'CPAC resources folder in %s is complete!' % local_base
コード例 #11
0
def download_single_s3_path(s3_path, cfg_dict):
    """Download a single file from an AWS s3 bucket.

    :type s3_path: str
    :param s3_path: An "s3://" pre-pended path to a file stored on an
                    Amazon AWS s3 bucket.
    :type cfg_dict: dictionary
    :param cfg_dict: A dictionary containing the pipeline setup
                     parameters.
    :rtype: str
    :return: The local filepath of the downloaded s3 file.
    """

    import os
    from indi_aws import fetch_creds, aws_utils
    from qap_utils import raise_smart_exception

    # Init variables
    working_dir = cfg_dict["working_directory"]
    try:
        creds_path = cfg_dict["creds_path"]
    except KeyError:
        creds_path = None

    if "s3://" in s3_path:
        s3_prefix = s3_path.replace("s3://", "")
    else:
        err = "[!] S3 filepaths must be pre-pended with the 's3://' prefix."
        raise_smart_exception(locals(), err)

    bucket_name = s3_prefix.split("/")[0]
    bucket = fetch_creds.return_bucket(creds_path, bucket_name)

    data_dir = s3_path.split(bucket_name + "/")[1]
    local_dl = os.path.join(working_dir, data_dir)

    if os.path.isfile(local_dl):
        print "\nS3 bucket file already downloaded! Skipping download."
        print "S3 file: %s" % s3_path
        print "Local file already exists: %s\n" % local_dl
    else:
        aws_utils.s3_download(bucket, ([data_dir], [local_dl]))

    return local_dl
def download_single_s3_path(s3_path, cfg_dict):
    """Download a single file from an AWS s3 bucket.

    :type s3_path: str
    :param s3_path: An "s3://" pre-pended path to a file stored on an
                    Amazon AWS s3 bucket.
    :type cfg_dict: dictionary
    :param cfg_dict: A dictionary containing the pipeline setup
                     parameters.
    :rtype: str
    :return: The local filepath of the downloaded s3 file.
    """

    import os
    from indi_aws import fetch_creds, aws_utils
    from qap_utils import raise_smart_exception

    # Init variables
    working_dir = cfg_dict["working_directory"]
    try:
        creds_path = cfg_dict["creds_path"]
    except KeyError:
        creds_path = None

    if "s3://" in s3_path:
        s3_prefix = s3_path.replace("s3://","")
    else:
        err = "[!] S3 filepaths must be pre-pended with the 's3://' prefix."
        raise_smart_exception(locals(),err)

    bucket_name = s3_prefix.split("/")[0]
    bucket = fetch_creds.return_bucket(creds_path, bucket_name)

    data_dir = s3_path.split(bucket_name + "/")[1]
    local_dl = os.path.join(working_dir, data_dir)

    if os.path.isfile(local_dl):
        print "\nS3 bucket file already downloaded! Skipping download."
        print "S3 file: %s" % s3_path
        print "Local file already exists: %s\n" % local_dl
    else:
        aws_utils.s3_download(bucket, ([data_dir], [local_dl]))

    return local_dl
コード例 #13
0
ファイル: run.py プロジェクト: FCP-INDI/C-PAC
def load_yaml_config(config_filename, aws_input_creds):

    if config_filename.lower().startswith('data:'):
        try:
            header, encoded = config_filename.split(",", 1)
            config_content = b64decode(encoded)
            config_data = yaml.load(config_content)
            return config_data
        except:
            print("Error! Could not find load config from data URI")
            raise

    if config_filename.lower().startswith("s3://"):
        # s3 paths begin with s3://bucket/
        bucket_name = config_filename.split('/')[2]
        s3_prefix = '/'.join(config_filename.split('/')[:3])
        prefix = config_filename.replace(s3_prefix, '').lstrip('/')

        if aws_input_creds:
            if not os.path.isfile(aws_input_creds):
                raise IOError("Could not find aws_input_creds (%s)" %
                              (aws_input_creds))

        from indi_aws import fetch_creds
        bucket = fetch_creds.return_bucket(aws_input_creds, bucket_name)
        downloaded_config = '/tmp/' + os.path.basename(config_filename)
        bucket.download_file(prefix, downloaded_config)
        config_filename = downloaded_config

    config_filename = os.path.realpath(config_filename)

    try:
        with open(config_filename, 'r') as f:
            config_data = yaml.load(f)
            return config_data
    except IOError:
        print("Error! Could not find config file {0}".format(config_filename))
        raise
コード例 #14
0
def download_from_s3(s3_path, local_path, s3_creds):

    import os

    try:
        from indi_aws import fetch_creds, aws_utils
    except:
        err = "\n\n[!] You need the INDI AWS package installed in order to " \
              "pull from an S3 bucket. Try 'pip install indi_aws'\n\n"
        raise Exception(err)

    s3_path = s3_path.replace("s3://","")
    bucket_name = s3_path.split("/")[0]
    bucket_prefix = s3_path.split(bucket_name + "/")[1]

    filename = s3_path.split("/")[-1]
    local_file = os.path.join(local_path, filename)

    if not os.path.exists(local_file):
        bucket = fetch_creds.return_bucket(s3_creds, bucket_name)
        aws_utils.s3_download(bucket, ([bucket_prefix], [local_file]))

    return local_file
コード例 #15
0
def pull_s3_sublist(data_folder, creds_path=None):
    """Create a list of filepaths stored on the Amazon S3 bucket.

    :type data_folder: str
    :param data_folder: The full S3 (s3://) path to the directory holding the
                        data.
    :type creds_path: str
    :param creds_path: The filepath to your Amazon AWS keys.
    :rtype: list
    :return: A list of Amazon S3 filepaths from the bucket and bucket
             directory you provided.
    """

    import os
    from indi_aws import fetch_creds

    if creds_path:
        creds_path = os.path.abspath(creds_path)

    s3_path = data_folder.split("s3://")[1]
    bucket_name = s3_path.split("/")[0]
    bucket_prefix = s3_path.split(bucket_name + "/")[1]

    s3_list = []
    bucket = fetch_creds.return_bucket(creds_path, bucket_name)

    # ensure slash at end of bucket_prefix, so that if the final
    # directory name is a substring in other directory names, these
    # other directories will not be pulled into the file list
    if "/" not in bucket_prefix[-1]:
        bucket_prefix += "/"

    # Build S3-subjects to download
    for bk in bucket.objects.filter(Prefix=bucket_prefix):
        s3_list.append(str(bk.key).replace(bucket_prefix, ""))

    return s3_list
コード例 #16
0
def upl_qap_output(cfg_file):
    """Upload a pipeline output file to an AWS S3 bucket.

    :type cfg_file: str
    :param cfg_file: Filepath to the pipeline configuration file containing
                     S3 bucket and AWS credentials information.
    """

    # Import packages
    from indi_aws import aws_utils, fetch_creds
    import os
    import yaml

    # Load config file
    with open(cfg_file["pipeline_config_yaml"], 'r') as f:
        cfg_dict = yaml.load(f)

    # Init variables
    bucket_name = cfg_dict["bucket_name"]
    bucket_out_prefix = cfg_dict["bucket_prefix"]
    creds_path = cfg_dict["creds_path"]

    bucket = fetch_creds.return_bucket(creds_path, bucket_name)

    output_dir = cfg_dict['output_directory']

    # And upload data
    upl_files = []
    for root, dirs, files in os.walk(output_dir):
        if files:
            upl_files.extend([os.path.join(root, fil) for fil in files])

    # Using INDI AWS utils
    s3_upl_files = [ufile.replace(output_dir, bucket_out_prefix) \
                   for ufile in upl_files]

    aws_utils.s3_upload(bucket, (upl_files, s3_upl_files))
def upl_qap_output(cfg_file):
    """Upload a pipeline output file to an AWS S3 bucket.

    :type cfg_file: str
    :param cfg_file: Filepath to the pipeline configuration file containing
                     S3 bucket and AWS credentials information.
    """

    # Import packages
    from indi_aws import aws_utils, fetch_creds
    import os
    import yaml

    # Load config file
    with open(cfg_file["pipeline_config_yaml"],'r') as f:
        cfg_dict = yaml.load(f)

    # Init variables
    bucket_name = cfg_dict["bucket_name"]
    bucket_out_prefix = cfg_dict["bucket_prefix"]
    creds_path = cfg_dict["creds_path"]
    
    bucket = fetch_creds.return_bucket(creds_path, bucket_name)
        
    output_dir = cfg_dict['output_directory']

    # And upload data
    upl_files = []
    for root, dirs, files in os.walk(output_dir):
        if files:
            upl_files.extend([os.path.join(root, fil) for fil in files])

    # Using INDI AWS utils
    s3_upl_files = [ufile.replace(output_dir, bucket_out_prefix) \
                   for ufile in upl_files]

    aws_utils.s3_upload(bucket, (upl_files, s3_upl_files))
コード例 #18
0
def write_yaml_config(config_filename, body, aws_output_creds):

    if config_filename.lower().startswith("s3://"):

        # s3 paths begin with s3://bucket/
        bucket_name = config_filename.split('/')[2]
        s3_prefix = '/'.join(config_filename.split('/')[:3])
        s3_key = config_filename.replace(s3_prefix, '').lstrip('/')

        if aws_output_creds:
            if not os.path.isfile(aws_output_creds):
                raise IOError("Could not find aws_output_creds (%s)" %
                              (aws_output_creds))

        from indi_aws import fetch_creds
        bucket = fetch_creds.return_bucket(aws_output_creds, bucket_name)

        bucket.put_object(Body=body, Key=s3_key)
        config_filename = '/scratch/' + os.path.basename(config_filename)

    with open(config_filename, 'w') as ofd:
        ofd.writelines(body)

    return (config_filename)
コード例 #19
0
ファイル: run.py プロジェクト: BIDS-Apps/C-PAC
def write_yaml_config(config_filename, body, aws_output_creds):

    if config_filename.lower().startswith("s3://"):

        # s3 paths begin with s3://bucket/
        bucket_name = config_filename.split('/')[2]
        s3_prefix = '/'.join(config_filename.split('/')[:3])
        s3_key = config_filename.replace(s3_prefix, '').lstrip('/')

        if aws_output_creds:
            if not os.path.isfile(aws_output_creds):
                raise IOError("Could not find aws_output_creds (%s)" %
                              (aws_output_creds))

        from indi_aws import fetch_creds
        bucket = fetch_creds.return_bucket(aws_output_creds, bucket_name)

        bucket.put_object(Body=body, Key=s3_key)
        config_filename = '/scratch/'+os.path.basename(config_filename)

    with open(config_filename, 'w') as ofd:
        ofd.writelines(body)

    return(config_filename)
コード例 #20
0
import os

if __name__ == "__main__":
    s3_bucket = "fcp-indi"
    s3_creds = "/Users/cameron.craddock/AWS/ccraddock-fcp-indi-keys2.csv"
    s3_prefix = "data/Projects/ADHD200/RawDataBIDS"
    s3_sitedirs = ["Brown","KKI","NeuroIMAGE","NYU","OHSU","Peking_1","Peking_2","Peking_3","Pittsburgh","WashU"]
    out_prefix = "data/ADHD200/RawDataBIDS"
    max_subjs = 4

    if s3_creds:
        if not os.path.isfile(s3_creds):
            raise IOError("Could not filed aws_input_creds (%s)" % (s3_creds))

    from indi_aws import fetch_creds
    bucket = fetch_creds.return_bucket(s3_creds,s3_bucket)

    for site in s3_sitedirs:
        subjects=[]

        prefix=os.path.join(s3_prefix,site)
        print "gathering files from S3 bucket (%s) for %s" % (bucket, prefix)

        for s3_obj in bucket.objects.filter(Prefix=prefix):
            if 'T1w' in str(s3_obj.key) or 'bold' in str(s3_obj.key):
                fname = os.path.basename(str(s3_obj.key))
                if "sub-" not in fname:
                    if not os.path.exists(os.path.dirname(s3_obj.key).replace(s3_prefix,out_prefix)):
                        print "making the directory"
                        os.makedirs(os.path.dirname(s3_obj.key).replace(s3_prefix,out_prefix))
                    print "downloading %s to %s"%(str(s3_obj.key),str(s3_obj.key).replace(s3_prefix,out_prefix))
コード例 #21
0
def main():
    '''
    This function runs the main routine
    '''
    # Import packages
    from indi_aws import fetch_creds
    import os
    import yaml

    # Init variables
    creds_path = '/home/ubuntu/secure-creds/aws-keys/fcp-indi-keys2.csv'
    bucket = fetch_creds.return_bucket('fcp-indi', creds_path)
    bucket_prefix = 'data/Projects/ABIDE_Initiative/Outputs/cpac/raw_outputs_rerun'
    sub_fp = '/home/ubuntu/abide/preprocessing/yamls/subs_list.yml'
    sub_list = yaml.load(open(sub_fp, 'r'))
    example_subid = '0050002_session_1'

    # Populate list of files to link to
    #src_list = []
    #src_list = gather_files_tosort(src_list, bucket, bucket_prefix)

    # Derivatives dictionary {name: (no_files_per_strategy, filt_str)}
    strat_dict = {
        'nofilt_noglobal': ['pipeline_abide_rerun', 'global0'],
        'nofilt_global': ['pipeline_abide_rerun', 'global1'],
        'filt_noglobal': ['pipeline_abide_rerun__freq-filter', 'global0'],
        'filt_global': ['pipeline_abide_rerun__freq-filter', 'global1']
    }

    derivs_dict = {
        'alff': (1, 'alff_to_standard_smooth', 'nii.gz'),
        'degree_binarize':
        (1, 'centrality_outputs_smoothed', 'degree_centrality_binarize'),
        'degree_weighted':
        (1, 'centrality_outputs_smoothed', 'degree_centrality_weighted'),
        'dual_regression':
        (1, 'dr_tempreg_maps_zstat_stack_to_standard_smooth', 'nii.gz'),
        'eigenvector_binarize':
        (1, 'centrality_outputs_smoothed', 'eigenvector_centrality_binarize'),
        'eigenvector_weighted': (1, 'centrality_outputs_smoothed',
                                 'eigenvector_centrality_weighted'),
        'falff': (1, 'falff_to_standard_smooth', 'nii.gz'),
        'func_mask': (1, 'functional_brain_mask_to_standard', 'nii.gz'),
        'func_mean': (1, 'mean_functional_in_mni', 'nii.gz'),
        'func_preproc': (1, 'functional_mni', '.nii.gz'),
        'lfcd': (1, 'centrality_outputs_smoothed', 'lfcd_binarize'),
        'reho': (1, 'reho_to_standard_smooth', 'nii.gz'),
        'rois_aal': (4, 'roi_timeseries', 'aal'),
        'rois_cc200': (4, 'roi_timeseries', 'CC200'),
        'rois_cc400': (4, 'roi_timeseries', 'CC400'),
        'rois_dosenbach160': (4, 'roi_timeseries', 'rois_3mm'),
        'rois_ez': (4, 'roi_timeseries', 'ez'),
        'rois_ho': (4, 'roi_timeseries', 'ho_'),
        'rois_tt': (4, 'roi_timeseries', 'tt'),
        'vmhc': (1, 'vmhc_fisher_zstd_zstat_map', 'nii.gz')
    }

    # Create error and output dictionaries
    out_dict = {
        k: {kk: []
            for kk in derivs_dict.keys()}
        for k in strat_dict.keys()
    }
    err_dict = {
        k: {kk: []
            for kk in derivs_dict.keys()}
        for k in strat_dict.keys()
    }

    # Iterate through strategies
    for strat, filts in strat_dict.items():
        print('building %s...' % strat)
        filt = filts[0]
        g_sig = filts[1]
        strat_prefix = os.path.join(bucket_prefix, filt, example_subid)
        # Iterate through derivatives
        for deriv, v in derivs_dict.items():
            num_files = v[0]
            deriv_folder = v[1]
            name_filter = v[2]
            deriv_prefix = os.path.join(strat_prefix, deriv_folder)
            keys_list = []
            for key in bucket.list(prefix=deriv_prefix):
                k_name = str(key.name)
                # If global signal regression was used or didnt need to be
                if (g_sig in k_name or 'global' not in k_name) and \
                        name_filter in k_name:
                    keys_list.append(k_name)
            # Grab only wanted results from keys
            if len(keys_list) == num_files:
                out_dict[strat][deriv] = [
                    k for k in keys_list if '.nii.gz' in k or '.1D' in k
                ][0]
            else:
                err_dict[strat][deriv] = keys_list
                print('error in number of files!')

    # Go through dictionary and build paths
    mapping_dict = {}
    s = 1
    # For each subject
    for sub in sub_list:
        subid = sub.split('_')[-1] + '_session_1'
        print('populating %s...%d' % (subid, s))
        # For each strategy
        for strat, deriv_dict in out_dict.items():
            strat_prefix = os.path.join(bucket_prefix, strat)
            # For each derivative, generate src and dst filepaths
            d = 0
            for deriv, filepath in deriv_dict.items():
                deriv_prefix = os.path.join(strat_prefix, deriv,
                                            sub + '_' + deriv)
                # Check extensions
                if filepath.endswith('.nii.gz'):
                    dst_path = deriv_prefix + '.nii.gz'
                elif filepath.endswith('.1D'):
                    dst_path = deriv_prefix + '.1D'
                else:
                    raise Exception('Bad extension type')
                # Get sub id from filepath
                src_path = filepath.replace(example_subid, subid)
                mapping_dict[src_path] = dst_path
                d += 1
            if d != 20:
                print(d)
                raw_input('not enough dervivs')
        s += 1

    # Return
    return out_dict, err_dict, mapping_dict
コード例 #22
0
        # for l in infd.readlines():
        # file_paths.append(l.rstrip())

        bucket_name = args.bids_dir.split('/')[2]
        s3_prefix = '/'.join(args.bids_dir.split('/')[:3])
        prefix = args.bids_dir.replace(s3_prefix, '').lstrip('/')

        creds_path = ""
        if args.aws_input_creds:
            if not os.path.isfile(args.aws_input_creds):
                raise IOError("Could not filed aws_input_creds (%s)" % (args.aws_input_creds))
            creds_path = args.aws_input_creds

        from indi_aws import fetch_creds

        bucket = fetch_creds.return_bucket(creds_path, bucket_name)

        print "Gathering data from S3 bucket, this may take a while"

        obj_count = 0
        if args.participant_label:
            for pt in args.participant_label:
                pt = pt.lstrip("sub-")
                t_prefix = "%/sub-%s" % (prefix, pt)

                for s3_obj in bucket.objects.filter(Prefix=t_prefix):
                    obj_count+=1
                    if obj_count % 1000 == 0:
                        print "%dk"%(obj_count//1000)
                    file_paths.append(os.path.join(s3_prefix, str(s3_obj.key)))
        else:
コード例 #23
0
ファイル: datasource.py プロジェクト: rsingh5/C-PAC
def check_for_s3(file_path, creds_path, dl_dir=None, img_type='anat'):

    # Import packages
    import os
    import nibabel as nib
    import botocore.exceptions

    from indi_aws import fetch_creds

    # Init variables
    s3_str = 's3://'
    if dl_dir is None:
        dl_dir = os.getcwd()

    # Explicitly lower-case the "s3"
    if file_path.lower().startswith(s3_str):
        file_path_sp = file_path.split('/')
        file_path_sp[0] = file_path_sp[0].lower()
        file_path = '/'.join(file_path_sp)

    # Check for s3 string in filepaths
    if file_path.startswith(s3_str):
        # Get bucket name and bucket object
        bucket_name = file_path.replace(s3_str, '').split('/')[0]
        bucket = fetch_creds.return_bucket(creds_path, bucket_name)

        # Extract relative key path from bucket and local path
        s3_prefix = os.path.join(s3_str, bucket_name)
        s3_key = file_path.replace(s3_prefix, '').lstrip('/')
        local_path = os.path.join(dl_dir, os.path.basename(s3_key))

        # Get local directory and create folders if they dont exist
        local_dir = os.path.dirname(local_path)
        if not os.path.exists(local_dir):
            os.makedirs(local_dir)

        # Download file
        try:
            bucket.download_file(Key=s3_key, Filename=local_path)
        except botocore.exceptions.ClientError as exc:
            error_code = int(exc.response['Error']['Code'])
            if error_code == 403:
                err_msg = 'Access to bucket: "%s" is denied; using credentials '\
                          'in subject list: "%s"; cannot access the file "%s"'\
                          % (bucket_name, creds_path, file_path)
                raise Exception(err_msg)
            elif error_code == 404:
                err_msg = 'Bucket: "%s" does not exist; check spelling and try '\
                          'again' % bucket_name
                raise Exception(err_msg)
            else:
                err_msg = 'Unable to connect to bucket: "%s". Error message:\n%s'\
                          % (bucket_name, exc)
        except Exception as exc:
            err_msg = 'Unable to connect to bucket: "%s". Error message:\n%s'\
                      % (bucket_name, exc)
            raise Exception(err_msg)

    # Otherwise just return what was passed in
    else:
        local_path = file_path

    # Check image dimensionality
    if '.nii' in local_path:
        try:
            img_nii = nib.load(local_path)
        except Exception as e:
            # TODO: come up with a better option for handling rogue S3 files
            # TODO: that Nibabel chokes on
            print(str(e))
            return local_path

        if img_type == 'anat':
            if len(img_nii.shape) != 3:
                raise IOError('File: %s must be an anatomical image with 3 '\
                              'dimensions but %d dimensions found!'
                              % (local_path, len(img_nii.shape)))
        elif img_type == 'func':
            if len(img_nii.shape) != 4:
                raise IOError('File: %s must be a functional image with 4 '\
                              'dimensions but %d dimensions found!'
                              % (local_path, len(img_nii.shape)))

    # Return the local path
    return local_path
コード例 #24
0
ファイル: build_sublist.py プロジェクト: spisakt/C-PAC
def return_s3_filepaths(base_dir, creds_path=None):
    '''
    Function to return the filepaths from an S3 bucket given a file
    pattern template and, optionally, credentials

    Parameters
    ----------
    creds_path : string (optional); default=None
        filepath to a credentials file containing the AWS credentials
        to access the S3 bucket objects

    Returns
    -------
    matched_s3_paths : list
        a list of strings of the filepaths from the S3 bucket
    '''

    # Import packages
    import logging
    import os

    from indi_aws import fetch_creds

    # # Check for errors
    # if not bids_base_dir:
    #     if not ('{site}' in path_template and '{participant}' in path_template):
    #         err_msg = 'Please provide \'{site}\' and \'{particpant}\' in '\
    #                   'filepath template where site and participant-level '\
    #                   'directories are present'
    #         raise Exception(err_msg)

    # if running this with "Custom" (non-BIDS) file templates
    if '{site}' in base_dir:
        base_dir = base_dir.split('{site}')[0]
    elif '{participant}' in base_dir:
        base_dir = base_dir.split('{participant}')[0]

    # Init variables
    bucket_name = base_dir.split('/')[2]
    s3_prefix = '/'.join(base_dir.split('/')[:3])

    # Get logger
    logger = logging.getLogger('sublist_builder')

    # Extract base prefix to search through in S3
    prefix = base_dir.replace(s3_prefix, '').lstrip('/')

    # Attempt to get bucket
    try:
        bucket = fetch_creds.return_bucket(creds_path, bucket_name)
    except Exception as exc:
        err_msg = 'There was an error in retrieving S3 bucket: %s.\n' \
                  'Error: %s' % (bucket_name, exc)
        logger.error(err_msg)
        raise Exception(err_msg)

    # Get filepaths from S3 with prefix
    logger.info('Gathering files from S3 to parse...')
    s3_filepaths = []
    for s3_obj in bucket.objects.filter(Prefix=prefix):
        s3_filepaths.append(str(s3_obj.key))

    # Prepend 's3://bucket_name/' on found paths
    s3_filepaths = [os.path.join(s3_prefix, s3_fp) for s3_fp in s3_filepaths]

    return s3_filepaths
コード例 #25
0
ファイル: datasource.py プロジェクト: FCP-INDI/C-PAC
def check_for_s3(file_path, creds_path, dl_dir=None, img_type="anat"):
    """
    """

    # Import packages
    import os
    import nibabel as nib
    import botocore.exceptions

    from indi_aws import fetch_creds

    # Init variables
    s3_str = "s3://"
    if dl_dir is None:
        dl_dir = os.getcwd()

    # Explicitly lower-case the "s3"
    if file_path.lower().startswith(s3_str):
        file_path_sp = file_path.split("/")
        file_path_sp[0] = file_path_sp[0].lower()
        file_path = "/".join(file_path_sp)

    # Check for s3 string in filepaths
    if file_path.startswith(s3_str):
        # Get bucket name and bucket object
        bucket_name = file_path.replace(s3_str, "").split("/")[0]
        bucket = fetch_creds.return_bucket(creds_path, bucket_name)

        # Extract relative key path from bucket and local path
        s3_prefix = os.path.join(s3_str, bucket_name)
        s3_key = file_path.replace(s3_prefix, "").lstrip("/")
        local_path = os.path.join(dl_dir, os.path.basename(s3_key))

        # Get local directory and create folders if they dont exist
        local_dir = os.path.dirname(local_path)
        if not os.path.exists(local_dir):
            os.makedirs(local_dir)

        # Download file
        try:
            bucket.download_file(Key=s3_key, Filename=local_path)
        except botocore.exceptions.ClientError as exc:
            error_code = int(exc.response["Error"]["Code"])
            if error_code == 403:
                err_msg = (
                    'Access to bucket: "%s" is denied; using credentials '
                    'in subject list: "%s"; cannot access the file "%s"' % (bucket_name, creds_path, file_path)
                )
                raise Exception(err_msg)
            elif error_code == 404:
                err_msg = 'Bucket: "%s" does not exist; check spelling and try ' "again" % bucket_name
                raise Exception(err_msg)
            else:
                err_msg = 'Unable to connect to bucket: "%s". Error message:\n%s' % (bucket_name, exc)
        except Exception as exc:
            err_msg = 'Unable to connect to bucket: "%s". Error message:\n%s' % (bucket_name, exc)
            raise Exception(err_msg)

    # Otherwise just return what was passed in
    else:
        local_path = file_path

    # Check image dimensionality
    img_nii = nib.load(local_path)
    if img_type == "anat":
        if len(img_nii.shape) != 3:
            raise IOError(
                "File: %s must be an anatomical image with 3 "
                "dimensions but %d dimensions found!" % (local_path, len(img_nii.shape))
            )
    elif img_type == "func":
        if len(img_nii.shape) != 4:
            raise IOError(
                "File: %s must be a functional image with 4 "
                "dimensions but %d dimensions found!" % (local_path, len(img_nii.shape))
            )

    # Return the local path
    return local_path
コード例 #26
0
ファイル: bids_utils.py プロジェクト: gkiar/C-PAC
def collect_bids_files_configs(bids_dir, aws_input_creds=''):
    """
    :param bids_dir:
    :param aws_input_creds:
    :return:
    """

    file_paths = []
    config_dict = {}

    suffixes = ['T1w', 'bold', '_epi', 'phasediff', 'magnitude',
                'magnitude1', 'magnitude2']

    if bids_dir.lower().startswith("s3://"):
        # s3 paths begin with s3://bucket/
        bucket_name = bids_dir.split('/')[2]
        s3_prefix = '/'.join(bids_dir.split('/')[:3])
        prefix = bids_dir.replace(s3_prefix, '').lstrip('/')

        if aws_input_creds:
            if not os.path.isfile(aws_input_creds):
                raise IOError("Could not find aws_input_creds (%s)" %
                              (aws_input_creds))

        from indi_aws import fetch_creds
        bucket = fetch_creds.return_bucket(aws_input_creds, bucket_name)

        print(f"gathering files from S3 bucket ({bucket}) for {prefix}")

        for s3_obj in bucket.objects.filter(Prefix=prefix):
            for suf in suffixes:
                if suf in str(s3_obj.key):
                    if str(s3_obj.key).endswith("json"):
                        try:
                            config_dict[s3_obj.key.replace(prefix, "").lstrip('/')] \
                                = json.loads(s3_obj.get()["Body"].read())
                        except Exception as e:
                            print("Error retrieving %s (%s)" %
                                  (s3_obj.key.replace(prefix, ""),
                                  e.message))
                            raise
                    elif 'nii' in str(s3_obj.key):
                        file_paths.append(str(s3_obj.key)
                                          .replace(prefix,'').lstrip('/'))

    else:
        for root, dirs, files in os.walk(bids_dir, topdown=False):
            if files:
                for f in files:
                    for suf in suffixes:
                        if 'nii' in f and suf in f:
                            file_paths += [os.path.join(root, f).replace(bids_dir,'')
                                   .lstrip('/')]
                        if f.endswith('json') and suf in f:
                            try:
                                config_dict.update(
                                    {os.path.join(root.replace(bids_dir, '').lstrip('/'), f):
                                         json.load(open(os.path.join(root, f), 'r'))})
                            except UnicodeDecodeError:
                                raise Exception("Could not decode {0}".format(os.path.join(root, f)))

    if not file_paths and not config_dict:
        raise IOError("Didn't find any files in {0}. Please verify that the "
                      "path is typed correctly, that you have read access to "
                      "the directory, and that it is not "
                      "empty.".format(bids_dir))

    return file_paths, config_dict
コード例 #27
0
def check_for_s3(file_path, creds_path=None, dl_dir=None, img_type='other'):

    # Import packages
    import os
    import nibabel as nib
    import botocore.exceptions

    from indi_aws import fetch_creds

    # Init variables
    s3_str = 's3://'
    if creds_path:
        if "None" in creds_path or "none" in creds_path or \
                "null" in creds_path:
            creds_path = None
    if dl_dir is None:
        dl_dir = os.getcwd()

    if file_path is None:
        # in case it's something like scan parameters or field map files, but
        # we don't have any
        local_path = file_path
        return local_path

    # TODO: remove this once scan parameter input as dictionary is phased out
    if isinstance(file_path, dict):
        # if this is a dictionary, just skip altogether
        local_path = file_path
        return local_path

    # Explicitly lower-case the "s3"
    if file_path.lower().startswith(s3_str):

        file_path = s3_str + file_path[len(s3_str):]

        # Get bucket name and bucket object
        bucket_name = file_path[len(s3_str):].split('/')[0]
        bucket = fetch_creds.return_bucket(creds_path, bucket_name)

        # Extract relative key path from bucket and local path
        s3_prefix = s3_str + bucket_name
        s3_key = file_path[len(s3_prefix) + 1:]
        local_path = os.path.join(dl_dir, bucket_name, s3_key)

        # Get local directory and create folders if they dont exist
        local_dir = os.path.dirname(local_path)
        if not os.path.exists(local_dir):
            os.makedirs(local_dir)

        # Download file
        try:
            print("Attempting to download from AWS S3: {0}".format(file_path))
            bucket.download_file(Key=s3_key, Filename=local_path)
        except botocore.exceptions.ClientError as exc:
            error_code = int(exc.response['Error']['Code'])
            if error_code == 403:
                err_msg = 'Access to bucket: "%s" is denied; using credentials '\
                          'in subject list: "%s"; cannot access the file "%s"'\
                          % (bucket_name, creds_path, file_path)
                raise Exception(err_msg)
            elif error_code == 404:
                err_msg = 'File: {0} does not exist; check spelling and try '\
                          'again'.format(os.path.join(bucket_name, s3_key))
                raise Exception(err_msg)
            else:
                err_msg = 'Unable to connect to bucket: "%s". Error message:\n%s'\
                          % (bucket_name, exc)
        except Exception as exc:
            err_msg = 'Unable to connect to bucket: "%s". Error message:\n%s'\
                      % (bucket_name, exc)
            raise Exception(err_msg)

    # Otherwise just return what was passed in
    else:
        local_path = file_path

    # Check image dimensionality
    if local_path.endswith('.nii') or local_path.endswith('.nii.gz'):
        try:
            img_nii = nib.load(local_path)
        except Exception as e:
            # TODO: come up with a better option for handling rogue S3 files
            # TODO: that Nibabel chokes on
            print(str(e))
            return local_path

        if img_type == 'anat':
            if len(img_nii.shape) != 3:
                raise IOError('File: %s must be an anatomical image with 3 '\
                              'dimensions but %d dimensions found!'
                              % (local_path, len(img_nii.shape)))
        elif img_type == 'func':
            if len(img_nii.shape) != 4:
                raise IOError('File: %s must be a functional image with 4 '\
                              'dimensions but %d dimensions found!'
                              % (local_path, len(img_nii.shape)))

    # Return the local path
    return local_path
コード例 #28
0
ファイル: config_window.py プロジェクト: ccraddock/C-PAC
    def testConfig(self, event):
        '''
        This function runs when the user clicks the "Test Configuration"
        button in the pipeline configuration window.
        
        It prompts the user for a sample subject list (i.e. one that they will
        be using with the config they are building). Then it builds the
        pipeline but does not run it. It then reports whether or not the
        config will run or not depending on if the pipeline gets built
        successfully.
        '''

        # Import packages
        import os
        import yaml
        from CPAC.utils import Configuration

        from CPAC.pipeline.cpac_pipeline import prep_workflow
        from CPAC.pipeline.cpac_runner import build_strategies

        def display(win, msg, changeBg=True):
            wx.MessageBox(msg, "Error")
            if changeBg:
                win.SetBackgroundColour("pink")
            win.SetFocus()
            win.Refresh()

        # Collect a sample subject list and parse it in
        testDlg0 = wx.MessageDialog(
            self, 'This tool will run a quick check on the current pipeline '
                  'configuration. Click OK to provide a subject list you '
                  'will be using with this setup.',
            'Subject List',
            wx.OK | wx.ICON_INFORMATION)
        testDlg0.ShowModal()
        testDlg0.Destroy()
        
        dlg = wx.FileDialog(
            self, message="Choose the CPAC Subject list file",
            defaultDir=os.getcwd(), 
            defaultFile="CPAC_subject_list.yml",
            wildcard="YAML files(*.yaml, *.yml)|*.yaml;*.yml",
            style=wx.OPEN | wx.CHANGE_DIR)
        
        if dlg.ShowModal() == wx.ID_OK:
            subListPath = dlg.GetPath()
        
        # Load and test the subject list
        print 'Checking subject list: %s...' % subListPath
        sublist = yaml.load(open(os.path.realpath(subListPath), 'r'))
        sub_flg = self.test_sublist(sublist)
        if not sub_flg:
            raise Exception
        print 'Subject list looks good!'
        # Following code reads in the parameters and selections from the
        # pipeline configuration window and populate the config_list

        config_list = []
        wf_counter = []

        for page in self.nb.get_page_list():

            switch = page.page.get_switch()

            ctrl_list = page.page.get_ctrl_list()
            validate = False

            if switch:
                switch_val = str(switch.get_selection()).lower()

                if switch_val == 'on' or switch_val == 'true' or \
                    switch_val == '1':

                    validate = True
                    wf_counter.append(page.get_counter())

            for ctrl in ctrl_list:

                # option_name will be the selection name as it is written
                # as the dictionary key of the config.yml dictionary
                option_name = ctrl.get_name()

                #validating
                if (switch == None or validate) and ctrl.get_validation() \
                    and (option_name != 'derivativeList') and \
                        (option_name != 'modelConfigs'):

                    win = ctrl.get_ctrl()
                    
                    if isinstance(ctrl.get_selection(), list):
                        value = ctrl.get_selection()
                        if not value:
                            display(
                                win, "%s field is empty or the items are " \
                                     "not checked!" % ctrl.get_name(), False)
                            return

                    elif (option_name == "tsa_roi_paths") or \
                             (option_name == "sca_roi_paths"):

                        # fires if the control is the checkbox grid for
                        # multiple paths assigned to multiple options
                        # (i.e. timeseries analysis)

                        config_list.append(ctrl)
                        continue

                    else:
                        value = str(ctrl.get_selection())

                    if len(value) == 0:
                        display(win, "%s field is empty!" % ctrl.get_name())
                        return
                        
                    if '/' in value and '$' not in value and not \
                        isinstance(value, list):

                        if not os.path.exists(ctrl.get_selection()) and \
                                        value != 'On/Off':
                            display(
                                win, "%s field contains incorrect path. " \
                                "Please update the path!" % ctrl.get_name())
                            return
                    
                config_list.append(ctrl)

        # Write out a pipeline_config file, read it in and then delete it
        # (Will revise the data structure of the config files later so this
        # can just pass the data structure instead of doing it this way)
        try:
            test_cfg_yml = '/tmp/test_config.yml'
            self.write(test_cfg_yml, config_list)
            c = Configuration(yaml.load(open(os.path.realpath(test_cfg_yml), 'r')))
            os.remove(test_cfg_yml)
        except:
            errDlg2 = wx.MessageDialog(
                self, 'A problem occurred with preparing the pipeline test run. \n\n' \
                      'Please ensure you have rights access to the directories you' \
                      ' have chosen for the CPAC working, crash, and output folders.',
                'Test Configuration Error',
                wx.OK | wx.ICON_ERROR)
            errDlg2.ShowModal()
            errDlg2.Destroy()

        if (1 in c.runNuisance) or (c.Regressors != None):
            strategies = sorted(build_strategies(c))
        else:
            strategies = None

        # Run the actual pipeline building prep and see if it works or not
        testDlg1 = wx.MessageDialog(
            self, 'Click OK to run the test. This should take only a few seconds.',
            'Running Test',
            wx.OK | wx.ICON_INFORMATION)
        testDlg1.ShowModal()

        # Check file paths first
        
        # Just getting proper names of config file parameters
        try:
            params_file = open(p.resource_filename('CPAC', 'GUI/resources/config_parameters.txt'), "r")
        except:
            print "Error: Could not open configuration parameter file.", "\n"
            raise Exception            

        paramInfo = params_file.read().split('\n')
        paramList = []

        for param in paramInfo:
            if param != '':
                paramList.append(param.split(','))

        # function for file path checking
        def testFile(filepath, paramName, switch):
            try:
                if (1 in switch) and (filepath != None):
                    fileTest = open(filepath)
                    fileTest.close()
            except:
                testDlg1.Destroy()
                
                for param in paramList:
                    if param[0] == paramName:
                        paramTitle = param[1]
                        paramGroup = param[2]
                        break
                    
                errDlgFileTest = wx.MessageDialog(
                    self, 'Error reading file - either it does not exist or '\
                          'you do not have read access. \n\n' \
                          'Parameter: %s \n' \
                          'In tab: %s \n\n' \
                          'Path: %s' % (paramTitle, paramGroup, filepath),
                    'Pipeline Not Ready',
                    wx.OK | wx.ICON_ERROR)
                errDlgFileTest.ShowModal()
                errDlgFileTest.Destroy()

        # Check S3 output bucket access if writing to S3
        output_dir = c.outputDirectory
        s3_str = 's3://'
        if output_dir.lower().startswith(s3_str):
            output_dir_sp = output_dir.split('/')
            output_dir_sp[0] = output_dir_sp[0].lower()
            output_dir = '/'.join(output_dir_sp)

        if type(output_dir) is str and output_dir.lower().startswith(s3_str):
            from indi_aws import fetch_creds
            creds_path = c.awsOutputBucketCredentials
            bucket_name = output_dir.split(s3_str)[1].split('/')[0]
            try:
                bucket = fetch_creds.return_bucket(creds_path, bucket_name)
                print 'Connection with output bucket "%s" successful!' % bucket_name
            except Exception as exc:
                err_msg = 'Unable to access output S3 bucket: "%s" with '\
                          'credentials in: "%s". Check bucket name '\
                          'and credentials file and try again'\
                          % (bucket_name, creds_path)
                testDlg1.Destroy()

                errDlg1 = wx.MessageDialog(self, err_msg, 'Pipeline Not Ready',
                                           wx.OK | wx.ICON_ERROR)
                errDlg1.ShowModal()
                errDlg1.Destroy()
                return

        testFile(c.template_brain_only_for_anat, \
                     'template_brain_only_for_anat',[1])
        testFile(c.template_skull_for_anat,'template_skull_for_anat',[1])
        testFile(c.PRIORS_WHITE,'PRIORS_WHITE',c.runSegmentationPreprocessing)
        testFile(c.PRIORS_GRAY,'PRIORS_GRAY',c.runSegmentationPreprocessing)
        testFile(c.PRIORS_CSF,'PRIORS_CSF',c.runSegmentationPreprocessing)
        testFile(c.template_brain_only_for_func, \
                     'template_brain_only_for_func',c.runRegisterFuncToMNI)
        testFile(c.template_skull_for_func,'template_skull_for_func', \
                     c.runRegisterFuncToMNI)
        testFile(c.identityMatrix,'identityMatrix',c.runRegisterFuncToMNI)
        testFile(c.boundaryBasedRegistrationSchedule, \
                     'boundaryBasedRegistrationSchedule', \
                     c.runRegisterFuncToAnat)
        testFile(c.lateral_ventricles_mask,'lateral_ventricles_mask', \
                     c.runNuisance)
        testFile(c.template_symmetric_brain_only, \
                     'template_symmetric_brain_only',c.runVMHC)
        testFile(c.template_symmetric_skull,'template_symmetric_skull', \
                     c.runVMHC)
        testFile(c.dilated_symmetric_brain_mask, \
                     'dilated_symmetric_brain_mask',c.runVMHC)
        testFile(c.configFileTwomm,'configFileTwomm',c.runVMHC)
        testFile(c.templateSpecificationFile,'templateSpecificationFile', \
                     c.runNetworkCentrality)

        if c.tsa_roi_paths and type(c.tsa_roi_paths[0]) == dict:
            for roi_path in c.tsa_roi_paths[0].keys():
                testFile(roi_path, "tsa_roi_paths", c.runROITimeseries)
        if c.sca_roi_paths and type(c.sca_roi_paths[0]) == dict:
            for roi_path in c.sca_roi_paths[0].keys():
                testFile(roi_path, "sca_roi_paths", c.runSCA)
        try:
            # Run the pipeline building
            prep_workflow(sublist[0], c, strategies, 0)

        except Exception as xxx:
            print xxx
            print "an exception occurred"
            
            testDlg1.Destroy()
            
            errDlg1 = wx.MessageDialog(
                self, 'There are issues with the current configuration ' \
                      'which need to be resolved - please check to make ' \
                      'sure the options you are running have the proper ' \
                      'pre-requisites selected.\n\nIssue Info:\n%s' \
                      % str(xxx),
                'Pipeline Not Ready',
                wx.OK | wx.ICON_ERROR)
            errDlg1.ShowModal()
            errDlg1.Destroy()
            
        else:
            testDlg1.Destroy()
            
            okDlg1 = wx.MessageDialog(
                self, 'The current configuration will run successfully. You '\
                      'can safely save and run this setup!',
                'Pipeline Ready',
                wx.OK | wx.ICON_INFORMATION)
            okDlg1.ShowModal()
            okDlg1.Destroy()
コード例 #29
0
ファイル: build_sublist.py プロジェクト: swatirane/C-PAC
def return_bids_template(base_dir, scan_type, creds_path=None):
    '''
    Function that returns the path template of the desired scan type
    from a BIDS dataset

    Parameters
    ----------
    base_dir : string
        base directory of the BIDS dataset
    scan_type : string
        type of scan; e.g. 'anat', 'func', etc.
    creds_path : string (optional); default=None
        filepath to a set of AWS credentials to access a BIDS dataset
        stored on S3 that isn't public

    Returns
    -------
    file_template : string
        regular expression-compatible file template indicating data
        path organization
    '''

    # Import packages
    import os
    from indi_aws import fetch_creds

    # Init variables
    s3_str = 's3://'
    file_path = None

    # If base directory is in S3
    if base_dir.startswith(s3_str):
        bucket_name = base_dir.split('/')[2]
        s3_prefix = '/'.join(base_dir.split('/')[:3])

        # Extract base prefix to search through in S3
        prefix = base_dir.split('*')[0].replace(s3_prefix, '').lstrip('/')

        # Attempt to get bucket
        try:
            bucket = fetch_creds.return_bucket(creds_path, bucket_name)
        except Exception as exc:
            err_msg = 'There was an error in retrieving S3 bucket: %s.\nError: %s'\
                      %(bucket_name, exc)
            raise Exception(err_msg)

        # Get filepaths from S3 with prefix
        print 'Gathering files from S3 to parse...'
        for s3_obj in bucket.objects.filter(Prefix=prefix):
            file_path = s3_obj.key
            scan_dir = file_path.split('/')[-2]
            if scan_dir == scan_type:
                break
    # Else, the base directory is locally stored
    else:
        for root, dirs, files in os.walk(base_dir):
            if file_path:
                break
            for fil in files:
                file_path = os.path.join(root, fil)
                scan_dir = file_path.split('/')[-2]
                if fil.endswith('.nii.gz') and scan_dir == scan_type:
                    break
                else:
                    file_path = None

    # Now replace file_path intermediate dirs with *
    if file_path:
        rel_path = file_path.replace(base_dir, '').lstrip('/')
        interm_dirs = rel_path.split('/')[:-2]
        for imd in interm_dirs:
            file_path = file_path.replace(imd, '*')
    else:
        err_msg = 'Could not find any files in directory, check files!'
        raise Exception(err_msg)

    # Set template as any file *
    file_template = os.path.join(os.path.dirname(file_path), '*.nii.gz')

    # Return file pattern template
    return file_template
コード例 #30
0
ファイル: datasource.py プロジェクト: ccraddock/C-PAC
def check_for_s3(file_path, creds_path, dl_dir=None, img_type='anat'):

    # Import packages
    import os
    import nibabel as nib
    import botocore.exceptions

    from indi_aws import fetch_creds

    # Init variables
    s3_str = 's3://'
    if dl_dir is None:
        dl_dir = os.getcwd()

    if file_path is None:
        # in case it's something like scan parameters or field map files, but
        # we don't have any
        local_path = file_path
        return local_path

    # TODO: remove this once scan parameter input as dictionary is phased out
    if isinstance(file_path, dict):
        # if this is a dictionary, just skip altogether
        local_path = file_path
        return local_path

    # Explicitly lower-case the "s3"
    if file_path.lower().startswith(s3_str):
        file_path_sp = file_path.split('/')
        file_path_sp[0] = file_path_sp[0].lower()
        file_path = '/'.join(file_path_sp)

    # Check for s3 string in filepaths
    if file_path.startswith(s3_str):
        # Get bucket name and bucket object
        bucket_name = file_path.replace(s3_str, '').split('/')[0]
        bucket = fetch_creds.return_bucket(creds_path, bucket_name)

        # Extract relative key path from bucket and local path
        s3_prefix = os.path.join(s3_str, bucket_name)
        s3_key = file_path.replace(s3_prefix, '').lstrip('/')
        local_path = os.path.join(dl_dir, s3_key)

        # Get local directory and create folders if they dont exist
        local_dir = os.path.dirname(local_path)
        if not os.path.exists(local_dir):
            os.makedirs(local_dir)

        # Download file
        try:
            print("Attempting to download from AWS S3: {0}".format(file_path))
            bucket.download_file(Key=s3_key, Filename=local_path)
        except botocore.exceptions.ClientError as exc:
            error_code = int(exc.response['Error']['Code'])
            if error_code == 403:
                err_msg = 'Access to bucket: "%s" is denied; using credentials '\
                          'in subject list: "%s"; cannot access the file "%s"'\
                          % (bucket_name, creds_path, file_path)
                raise Exception(err_msg)
            elif error_code == 404:
                err_msg = 'Bucket: "%s" does not exist; check spelling and try '\
                          'again' % bucket_name
                raise Exception(err_msg)
            else:
                err_msg = 'Unable to connect to bucket: "%s". Error message:\n%s'\
                          % (bucket_name, exc)
        except Exception as exc:
            err_msg = 'Unable to connect to bucket: "%s". Error message:\n%s'\
                      % (bucket_name, exc)
            raise Exception(err_msg)

    # Otherwise just return what was passed in
    else:
        local_path = file_path

    # Check image dimensionality
    if '.nii' in local_path:
        try:
            img_nii = nib.load(local_path)
        except Exception as e:
            # TODO: come up with a better option for handling rogue S3 files
            # TODO: that Nibabel chokes on
            print(str(e))
            return local_path

        if img_type == 'anat':
            if len(img_nii.shape) != 3:
                raise IOError('File: %s must be an anatomical image with 3 '\
                              'dimensions but %d dimensions found!'
                              % (local_path, len(img_nii.shape)))
        elif img_type == 'func':
            if len(img_nii.shape) != 4:
                raise IOError('File: %s must be a functional image with 4 '\
                              'dimensions but %d dimensions found!'
                              % (local_path, len(img_nii.shape)))
        elif img_type == "other":
            pass

    # Return the local path
    return local_path
コード例 #31
0
ファイル: build_sublist.py プロジェクト: swatirane/C-PAC
def return_s3_filepaths(path_template, creds_path=None, bids_flag=False):
    '''
    Function to return the filepaths from an S3 bucket given a file
    pattern template and, optionally, credentials

    Parameters
    ----------
    path_template : string
        filepath template in the form of:
        's3://bucket_name/base_dir/{site}/{participant}/{session}/..
        ../file.nii.gz'; if bids_flag is set, path_template is just the
        base directory of the BIDS data set
    creds_path : string (optional); default=None
        filepath to a credentials file containing the AWS credentials
        to access the S3 bucket objects
    bids_flag : boolean (optional); default=False
        flag to indicate if the dataset to gather is organized to the
        BIDS standard

    Returns
    -------
    matched_s3_paths : list
        a list of strings of the filepaths from the S3 bucket
    '''

    # Import packages
    import fnmatch
    import logging
    import os
    import re

    from indi_aws import fetch_creds

    # Check for errors
    if not bids_flag:
        if not ('{site}' in path_template
                and '{participant}' in path_template):
            err_msg = 'Please provide \'{site}\' and \'{particpant}\' in '\
                      'filepath template where site and participant-level '\
                      'directories are present'
            raise Exception(err_msg)

    # Init variables
    bucket_name = path_template.split('/')[2]
    s3_prefix = '/'.join(path_template.split('/')[:3])

    # Get logger
    logger = logging.getLogger('sublist_builder')

    # Extract base prefix to search through in S3
    if bids_flag:
        prefix = path_template.split('*')[0].replace(s3_prefix, '').lstrip('/')
    else:
        prefix = path_template.split('{site}')[0].replace(s3_prefix,
                                                          '').lstrip('/')

    # Attempt to get bucket
    try:
        bucket = fetch_creds.return_bucket(creds_path, bucket_name)
    except Exception as exc:
        err_msg = 'There was an error in retrieving S3 bucket: %s.\nError: %s'\
                  %(bucket_name, exc)
        logger.error(err_msg)
        raise Exception(err_msg)

    # Get filepaths from S3 with prefix
    logger.info('Gathering files from S3 to parse...')
    s3_filepaths = []
    for s3_obj in bucket.objects.filter(Prefix=prefix):
        s3_filepaths.append(str(s3_obj.key))

    # Prepend 's3://bucket_name/' on found paths
    s3_filepaths = [os.path.join(s3_prefix, s3_fp) for s3_fp in s3_filepaths]

    # File pattern filter
    if bids_flag:
        file_pattern = path_template
    else:
        file_pattern = path_template.replace('{site}', '*').\
                       replace('{participant}', '*').replace('{session}', '*')

    # Get only matching s3 paths
    s3_filepaths = fnmatch.filter(s3_filepaths, file_pattern)

    # Restrict filepaths and pattern to be of same directory depth
    # as fnmatch will expand /*/ recursively to .../*/*/...
    matched_s3_paths = []
    for s3fp in s3_filepaths:
        s3_split = s3fp.split('/')
        fp_split = file_pattern.split('/')
        if len(s3_split) == len(fp_split):
            matched_s3_paths.append(s3fp)

    # Print how many found
    num_s3_files = len(matched_s3_paths)
    logger.info('Found %d files!' % num_s3_files)

    # Return the filepaths as a list
    return matched_s3_paths
コード例 #32
0
def check_for_s3(file_path,
                 creds_path=None,
                 dl_dir=None,
                 img_type='other',
                 verbose=False):
    # Import packages
    import os
    import nibabel as nib
    import botocore.exceptions
    from indi_aws import fetch_creds

    # Init variables
    s3_str = 's3://'
    if creds_path:
        if "None" in creds_path or "none" in creds_path or \
                        "null" in creds_path:
            creds_path = None

    if dl_dir is None:
        dl_dir = os.getcwd()

    if file_path is None:
        # in case it's something like scan parameters or field map files, but
        # we don't have any
        return None

    # TODO: remove this once scan parameter input as dictionary is phased out
    if isinstance(file_path, dict):
        # if this is a dictionary, just skip altogether
        local_path = file_path
        return local_path

    if file_path.lower().startswith(s3_str):

        file_path = s3_str + file_path[len(s3_str):]

        # Get bucket name and bucket object
        bucket_name = file_path[len(s3_str):].split('/')[0]
        # Extract relative key path from bucket and local path
        s3_prefix = s3_str + bucket_name
        s3_key = file_path[len(s3_prefix) + 1:]
        local_path = os.path.join(dl_dir, bucket_name, s3_key)

        # Get local directory and create folders if they dont exist
        local_dir = os.path.dirname(local_path)
        if not os.path.exists(local_dir):
            os.makedirs(local_dir, exist_ok=True)

        if os.path.exists(local_path):
            print("{0} already exists- skipping download.".format(local_path))
        else:
            # Download file
            try:
                bucket = fetch_creds.return_bucket(creds_path, bucket_name)
                print("Attempting to download from AWS S3: {0}".format(
                    file_path))
                bucket.download_file(Key=s3_key, Filename=local_path)
            except botocore.exceptions.ClientError as exc:
                error_code = int(exc.response['Error']['Code'])

                err_msg = str(exc)
                if error_code == 403:
                    err_msg = 'Access to bucket: "%s" is denied; using credentials ' \
                              'in subject list: "%s"; cannot access the file "%s"' \
                              % (bucket_name, creds_path, file_path)
                elif error_code == 404:
                    err_msg = 'File: {0} does not exist; check spelling and try ' \
                              'again'.format(
                        os.path.join(bucket_name, s3_key))
                else:
                    err_msg = 'Unable to connect to bucket: "%s". Error message:\n%s' \
                              % (bucket_name, exc)

                raise Exception(err_msg)

            except Exception as exc:
                err_msg = 'Unable to connect to bucket: "%s". Error message:\n%s' \
                          % (bucket_name, exc)
                raise Exception(err_msg)

    # Otherwise just return what was passed in
    else:
        local_path = file_path

    # Check if it exists or it is successfully downloaded
    if not os.path.exists(local_path):
        # alert users to 2020-07-20 Neuroparc atlas update (v0 to v1)
        ndmg_atlases = {}
        with open(
                os.path.join(os.path.dirname(os.path.dirname(__file__)),
                             'resources/templates/ndmg_atlases.csv')
        ) as ndmg_atlases_file:
            ndmg_atlases['v0'], ndmg_atlases['v1'] = zip(
                *[(f'/ndmg_atlases/label/Human/{atlas[0]}',
                   f'/ndmg_atlases/label/Human/{atlas[1]}')
                  for atlas in csv.reader(ndmg_atlases_file)])
        if local_path in ndmg_atlases['v0']:
            raise FileNotFoundError(''.join([
                'Neuroparc atlas paths were updated on July 20, 2020. '
                'C-PAC configuration files using Neuroparc v0 atlas paths '
                '(including C-PAC default and preconfigured pipeline '
                'configurations from v1.6.2a and earlier) need to be '
                'updated to use Neuroparc atlases. Your current '
                'configuration includes the Neuroparc v0 path '
                f'{local_path} which needs to be updated to ',
                ndmg_atlases['v1'][ndmg_atlases['v0'].index(local_path)],
                '. For a full list such paths, see https://fcp-indi.'
                'github.io/docs/nightly/user/ndmg_atlases'
            ]))
        else:
            raise FileNotFoundError(f'File {local_path} does not exist!')

    if verbose:
        print("Downloaded file:\n{0}\n".format(local_path))

    # Check image dimensionality
    if local_path.endswith('.nii') or local_path.endswith('.nii.gz'):
        img_nii = nib.load(local_path)

        if img_type == 'anat':
            if len(img_nii.shape) != 3:
                raise IOError('File: %s must be an anatomical image with 3 ' \
                              'dimensions but %d dimensions found!'
                              % (local_path, len(img_nii.shape)))
        elif img_type == 'func':
            if len(img_nii.shape) != 4:
                raise IOError('File: %s must be a functional image with 4 ' \
                              'dimensions but %d dimensions found!'
                              % (local_path, len(img_nii.shape)))

    return local_path
コード例 #33
0
def run_workflow(sub_dict, c, run, pipeline_timing_info=None, p_name=None,
                 plugin='MultiProc', plugin_args=None, test_config=False):
    '''
    Function to prepare and, optionally, run the C-PAC workflow

    Parameters
    ----------
    sub_dict : dictionary
        subject dictionary with anatomical and functional image paths
    c : Configuration object
        CPAC pipeline configuration dictionary object
    run : boolean
        flag to indicate whether to run the prepared workflow
    pipeline_timing_info : list (optional); default=None
        list of pipeline info for reporting timing information
    p_name : string (optional); default=None
        name of pipeline
    plugin : string (optional); defaule='MultiProc'
        nipype plugin to utilize when the workflow is ran
    plugin_args : dictionary (optional); default=None
        plugin-specific arguments for the workflow plugin

    Returns
    -------
    workflow : nipype workflow
        the prepared nipype workflow object containing the parameters
        specified in the config
    '''

    # Assure that changes on config will not affect other parts
    c = copy.copy(c)

    subject_id = sub_dict['subject_id']
    if sub_dict['unique_id']:
        subject_id += "_" + sub_dict['unique_id']

    log_dir = os.path.join(c.pipeline_setup['log_directory']['path'],
                           f'pipeline_{c.pipeline_setup["pipeline_name"]}',
                           subject_id)
    if not os.path.exists(log_dir):
        os.makedirs(os.path.join(log_dir))

    # TODO ASH Enforce c.run_logging to be boolean
    # TODO ASH Schema validation
    config.update_config({
        'logging': {
            'log_directory': log_dir,
            'log_to_file': bool(getattr(c.pipeline_setup['log_directory'],
                                        'run_logging', True))
        },
        'execution': {
            'crashfile_format': 'txt'
        }
    })

    config.enable_resource_monitor()
    logging.update_logging(config)

    # Start timing here
    pipeline_start_time = time.time()
    # at end of workflow, take timestamp again, take time elapsed and check
    # tempfile add time to time data structure inside tempfile, and increment
    # number of subjects

    # Check pipeline config resources
    sub_mem_gb, num_cores_per_sub, num_ants_cores, num_omp_cores = check_config_resources(
        c)

    if not plugin:
        plugin = 'MultiProc'

    if plugin_args:
        plugin_args['memory_gb'] = sub_mem_gb
        plugin_args['n_procs'] = num_cores_per_sub
    else:
        plugin_args = {'memory_gb': sub_mem_gb, 'n_procs': num_cores_per_sub}

    # perhaps in future allow user to set threads maximum
    # this is for centrality mostly
    # import mkl
    os.environ['OMP_NUM_THREADS'] = str(num_omp_cores)
    os.environ['MKL_NUM_THREADS'] = '1'  # str(num_cores_per_sub)
    os.environ['ITK_GLOBAL_DEFAULT_NUMBER_OF_THREADS'] = str(num_ants_cores)

    # TODO: TEMPORARY
    # TODO: solve the UNet model hanging issue during MultiProc
    if "UNet" in c.anatomical_preproc['brain_extraction']['using']:
        c.pipeline_setup['system_config']['max_cores_per_participant'] = 1
        logger.info("\n\n[!] LOCKING CPUs PER PARTICIPANT TO 1 FOR U-NET "
                    "MODEL.\n\nThis is a temporary measure due to a known "
                    "issue preventing Nipype's parallelization from running "
                    "U-Net properly.\n\n")

    # calculate maximum potential use of cores according to current pipeline
    # configuration
    max_core_usage = int(
        c.pipeline_setup['system_config']['max_cores_per_participant']) * \
                     int(c.pipeline_setup['system_config'][
                             'num_participants_at_once'])

    try:
        creds_path = sub_dict['creds_path']
        if creds_path and 'none' not in creds_path.lower():
            if os.path.exists(creds_path):
                input_creds_path = os.path.abspath(creds_path)
            else:
                err_msg = 'Credentials path: "%s" for subject "%s" was not ' \
                          'found. Check this path and try again.' % (
                              creds_path, subject_id)
                raise Exception(err_msg)
        else:
            input_creds_path = None
    except KeyError:
        input_creds_path = None

    # TODO enforce value with schema validation
    try:
        encrypt_data = bool(
            config.pipeline_setup['Amazon-AWS']['s3_encryption'])
    except:
        encrypt_data = False

    information = """

    C-PAC version: {cpac_version}

    Setting maximum number of cores per participant to {cores}
    Setting number of participants at once to {participants}
    Setting OMP_NUM_THREADS to {omp_threads}
    Setting MKL_NUM_THREADS to 1
    Setting ANTS/ITK thread usage to {ants_threads}
    Maximum potential number of cores that might be used during this run: {max_cores}

"""

    execution_info = """

    End of subject workflow {workflow}

    CPAC run complete:

        Pipeline configuration: {pipeline}
        Subject workflow: {workflow}
        Elapsed run time (minutes): {elapsed}
        Timing information saved in {log_dir}/cpac_individual_timing_{pipeline}.csv
        System time of start:      {run_start}
        System time of completion: {run_finish}

"""

    logger.info(information.format(
        cpac_version=CPAC.__version__,
        cores=c.pipeline_setup['system_config']['max_cores_per_participant'],
        participants=c.pipeline_setup['system_config'][
            'num_participants_at_once'],
        omp_threads=c.pipeline_setup['system_config']['num_OMP_threads'],
        ants_threads=c.pipeline_setup['system_config']['num_ants_threads'],
        max_cores=max_core_usage
    ))

    subject_info = {}
    subject_info['subject_id'] = subject_id
    subject_info['start_time'] = pipeline_start_time

    check_centrality_degree = c.network_centrality['run'] and \
                              (len(c.network_centrality['degree_centrality'][
                                       'weight_options']) != 0 or \
                               len(c.network_centrality[
                                       'eigenvector_centrality'][
                                       'weight_options']) != 0)

    check_centrality_lfcd = c.network_centrality['run'] and \
                            len(c.network_centrality[
                                    'local_functional_connectivity_density'][
                                    'weight_options']) != 0

    # Check system dependencies
    check_ica_aroma = c.nuisance_corrections['1-ICA-AROMA']['run']
    if isinstance(check_ica_aroma, list):
        check_ica_aroma = True in check_ica_aroma
    check_system_deps(check_ants='ANTS' in c.registration_workflows[
        'anatomical_registration']['registration']['using'],
                      check_ica_aroma=check_ica_aroma,
                      check_centrality_degree=check_centrality_degree,
                      check_centrality_lfcd=check_centrality_lfcd)

    # absolute paths of the dirs
    c.pipeline_setup['working_directory']['path'] = os.path.abspath(
        c.pipeline_setup['working_directory']['path'])
    if 's3://' not in c.pipeline_setup['output_directory']['path']:
        c.pipeline_setup['output_directory']['path'] = os.path.abspath(
            c.pipeline_setup['output_directory']['path'])

    workflow = build_workflow(
        subject_id, sub_dict, c, p_name, num_ants_cores
    )

    if test_config:
        logger.info('This has been a test of the pipeline configuration '
                    'file, the pipeline was built successfully, but was '
                    'not run')
    else:
        working_dir = os.path.join(
            c.pipeline_setup['working_directory']['path'], workflow.name)

        # if c.write_debugging_outputs:
        #    with open(os.path.join(working_dir, 'resource_pool.pkl'), 'wb') as f:
        #        pickle.dump(strat_list, f)

        # if c.pipeline_setup['working_directory']['regenerate_outputs'] is True:

        #     erasable = list(find_files(working_dir, '*sink*')) + \
        #         list(find_files(working_dir, '*link*')) + \
        #         list(find_files(working_dir, '*log*'))

        #     for f in erasable:
        #         if os.path.isfile(f):
        #             os.remove(f)
        #         else:
        #             shutil.rmtree(f)

        if hasattr(c, 'trim') and c.trim:
            logger.warn("""
Trimming is an experimental feature, and if used wrongly, it can lead to unreproducible results.
It is useful for performance optimization, but only if used correctly.
Please, make yourself aware of how it works and its assumptions:
    - The pipeline configuration has not changed;
    - The data configuration / BIDS directory has not changed;
    - The files from the output directory has not changed;
    - Your softwares versions has not changed;
    - Your C-PAC version has not changed;
    - You do not have access to the working directory.
""")

            workflow, _ = the_trimmer(
                workflow,
                output_dir=c.pipeline_setup['output_directory']['path'],
                s3_creds_path=input_creds_path,
            )

        pipeline_start_datetime = strftime("%Y-%m-%d %H:%M:%S")

        try:
            subject_info['resource_pool'] = []

            # for strat_no, strat in enumerate(strat_list):
            #    strat_label = 'strat_%d' % strat_no
            #    subject_info[strat_label] = strat.get_name()
            #    subject_info['resource_pool'].append(strat.get_resource_pool())

            subject_info['status'] = 'Running'

            # Create callback logger
            cb_log_filename = os.path.join(log_dir,
                                           'callback.log')

            try:
                if not os.path.exists(os.path.dirname(cb_log_filename)):
                    os.makedirs(os.path.dirname(cb_log_filename))
            except IOError:
                pass

            # Add handler to callback log file
            cb_logger = cb_logging.getLogger('callback')
            cb_logger.setLevel(cb_logging.DEBUG)
            handler = cb_logging.FileHandler(cb_log_filename)
            cb_logger.addHandler(handler)

            # Log initial information from all the nodes
            log_nodes_initial(workflow)

            # Add status callback function that writes in callback log
            if nipype.__version__ not in ('1.5.1'):
                err_msg = "This version of Nipype may not be compatible with " \
                          "CPAC v%s, please install Nipype version 1.5.1\n" \
                          % (CPAC.__version__)
                logger.error(err_msg)
            else:
                plugin_args['status_callback'] = log_nodes_cb

            if plugin_args['n_procs'] == 1:
                plugin = 'Linear'

            try:
                # Actually run the pipeline now, for the current subject
                workflow.run(plugin=plugin, plugin_args=plugin_args)
            except UnicodeDecodeError:
                raise EnvironmentError(
                    "C-PAC migrated from Python 2 to Python 3 in v1.6.2 (see "
                    "release notes). Your working directory contains Python 2 "
                    "pickles, probably from an older version of C-PAC. If you "
                    "want to continue to use this working directory, run\n\n"
                    "docker run -i --rm --user $(id -u):$(id -g) "
                    "-v /path/to/working_dir:/working "
                    "fcpindi/c-pac:latest /bids_dir /outputs cli -- "
                    "utils repickle /working\n"
                    "\nor\n\n"
                    "singularity run "
                    "C-PAC_latest.sif /bids_dir /outputs cli -- "
                    "utils repickle /path/to/working_dir\n\n"
                    "before running C-PAC >=v1.6.2"
                )

            # PyPEER kick-off
            # if c.PyPEER['run']:
            #    from CPAC.pypeer.peer import prep_for_pypeer
            #    prep_for_pypeer(c.PyPEER['eye_scan_names'], c.PyPEER['data_scan_names'],
            #                    c.PyPEER['eye_mask_path'], c.pipeline_setup['output_directory']['path'], subject_id,
            #                    pipeline_ids, c.PyPEER['stimulus_path'], c.PyPEER['minimal_nuisance_correction']['peer_gsr'],
            #                    c.PyPEER['minimal_nuisance_correction']['peer_scrub'], c.PyPEER['minimal_nuisance_correction']['scrub_thresh'])

            # Dump subject info pickle file to subject log dir
            subject_info['status'] = 'Completed'

            subject_info_file = os.path.join(
                log_dir, 'subject_info_%s.pkl' % subject_id
            )
            with open(subject_info_file, 'wb') as info:
                pickle.dump(list(subject_info), info)

            # have this check in case the user runs cpac_runner from terminal and
            # the timing parameter list is not supplied as usual by the GUI
            if pipeline_timing_info != None:

                # pipeline_timing_info list:
                #  [0] - unique pipeline ID
                #  [1] - pipeline start time stamp (first click of 'run' from GUI)
                #  [2] - number of subjects in subject list
                unique_pipeline_id = pipeline_timing_info[0]
                pipeline_start_stamp = pipeline_timing_info[1]
                num_subjects = pipeline_timing_info[2]

                # elapsed time data list:
                #  [0] - elapsed time in minutes
                elapsed_time_data = []

                elapsed_time_data.append(
                    int(((time.time() - pipeline_start_time) / 60)))

                # elapsedTimeBin list:
                #  [0] - cumulative elapsed time (minutes) across all subjects
                #  [1] - number of times the elapsed time has been appended
                #        (effectively a measure of how many subjects have run)

                # TODO
                # write more doc for all this
                # warning in .csv that some runs may be partial
                # code to delete .tmp file

                timing_temp_file_path = os.path.join(
                    c.pipeline_setup['log_directory']['path'],
                    '%s_pipeline_timing.tmp' % unique_pipeline_id)

                if not os.path.isfile(timing_temp_file_path):
                    elapsedTimeBin = []
                    elapsedTimeBin.append(0)
                    elapsedTimeBin.append(0)

                    with open(timing_temp_file_path, 'wb') as handle:
                        pickle.dump(elapsedTimeBin, handle)

                with open(timing_temp_file_path, 'rb') as handle:
                    elapsedTimeBin = pickle.loads(handle.read())

                elapsedTimeBin[0] = elapsedTimeBin[0] + elapsed_time_data[0]
                elapsedTimeBin[1] = elapsedTimeBin[1] + 1

                with open(timing_temp_file_path, 'wb') as handle:
                    pickle.dump(elapsedTimeBin, handle)

                # this happens once the last subject has finished running!
                if elapsedTimeBin[1] == num_subjects:

                    pipelineTimeDict = {}
                    pipelineTimeDict['Pipeline'] = c.pipeline_setup[
                        'pipeline_name']
                    pipelineTimeDict['Cores_Per_Subject'] = \
                    c.pipeline_setup['system_config'][
                        'max_cores_per_participant']
                    pipelineTimeDict['Simultaneous_Subjects'] = \
                    c.pipeline_setup['system_config'][
                        'num_participants_at_once']
                    pipelineTimeDict['Number_of_Subjects'] = num_subjects
                    pipelineTimeDict['Start_Time'] = pipeline_start_stamp
                    pipelineTimeDict['End_Time'] = strftime(
                        "%Y-%m-%d_%H:%M:%S")
                    pipelineTimeDict['Elapsed_Time_(minutes)'] = \
                    elapsedTimeBin[0]
                    pipelineTimeDict['Status'] = 'Complete'

                    gpaTimeFields = [
                        'Pipeline', 'Cores_Per_Subject',
                        'Simultaneous_Subjects',
                        'Number_of_Subjects', 'Start_Time',
                        'End_Time', 'Elapsed_Time_(minutes)',
                        'Status'
                    ]
                    timeHeader = dict(zip(gpaTimeFields, gpaTimeFields))

                    with open(os.path.join(
                            c.pipeline_setup['log_directory']['path'],
                                    'cpac_individual_timing_%s.csv' %
                                    c.pipeline_setup['pipeline_name']
                    ), 'a') as timeCSV, open(os.path.join(
                        c.pipeline_setup['log_directory']['path'],
                                'cpac_individual_timing_%s.csv' %
                                c.pipeline_setup['pipeline_name']
                    ), 'r') as readTimeCSV:

                        timeWriter = csv.DictWriter(timeCSV,
                                                    fieldnames=gpaTimeFields)
                        timeReader = csv.DictReader(readTimeCSV)

                        headerExists = False
                        for line in timeReader:
                            if 'Start_Time' in line:
                                headerExists = True

                        if headerExists == False:
                            timeWriter.writerow(timeHeader)

                        timeWriter.writerow(pipelineTimeDict)

                    # remove the temp timing file now that it is no longer needed
                    os.remove(timing_temp_file_path)

            # Upload logs to s3 if s3_str in output directory
            if c.pipeline_setup['output_directory'][
                'path'].lower().startswith('s3://'):

                try:
                    # Store logs in s3 output director/logs/...
                    s3_log_dir = os.path.join(
                        c.pipeline_setup['output_directory']['path'],
                        'logs',
                        os.path.basename(log_dir)
                    )
                    bucket_name = \
                    c.pipeline_setup['output_directory']['path'].split('/')[2]
                    bucket = fetch_creds.return_bucket(creds_path,
                                                       bucket_name)

                    # Collect local log files
                    local_log_files = []
                    for root, _, files in os.walk(log_dir):
                        local_log_files.extend([os.path.join(root, fil)
                                                for fil in files])
                    # Form destination keys
                    s3_log_files = [loc.replace(log_dir, s3_log_dir)
                                    for loc in local_log_files]
                    # Upload logs
                    aws_utils.s3_upload(bucket,
                                        (local_log_files, s3_log_files),
                                        encrypt=encrypt_data)
                    # Delete local log files
                    for log_f in local_log_files:
                        os.remove(log_f)

                except Exception as exc:
                    err_msg = 'Unable to upload CPAC log files in: %s.\nError: %s'
                    logger.error(err_msg, log_dir, exc)

        except Exception as e:
            import traceback;
            traceback.print_exc()
            execution_info = """

Error of subject workflow {workflow}

CPAC run error:

    Pipeline configuration: {pipeline}
    Subject workflow: {workflow}
    Elapsed run time (minutes): {elapsed}
    Timing information saved in {log_dir}/cpac_individual_timing_{pipeline}.csv
    System time of start:      {run_start}

"""

        finally:

            if workflow:

                resource_report(cb_log_filename,
                                num_cores_per_sub, logger)

                logger.info(execution_info.format(
                    workflow=workflow.name,
                    pipeline=c.pipeline_setup['pipeline_name'],
                    log_dir=c.pipeline_setup['log_directory']['path'],
                    elapsed=(time.time() - pipeline_start_time) / 60,
                    run_start=pipeline_start_datetime,
                    run_finish=strftime("%Y-%m-%d %H:%M:%S")
                ))

                # Remove working directory when done
                if c.pipeline_setup['working_directory'][
                    'remove_working_dir']:
                    try:
                        if os.path.exists(working_dir):
                            logger.info("Removing working dir: %s",
                                        working_dir)
                            shutil.rmtree(working_dir)
                    except (FileNotFoundError, PermissionError):
                        logger.warn('Could not remove working directory %s',
                                    working_dir)
コード例 #34
0
def test_bucket_access(creds_path, output_directory):
    """
    Function to test write-access to an S3 bucket.

    Parameters
    ----------
    :param creds_path : string
        path to the csv file downloaded from AWS; can either be root
        or user credentials
    :param output_directory : string
        directory to path on S3 where write-access should be tested;
        e.g. 's3://bucket_name/path/to/outputdir'

    Returns
    -------
    :return:
    s3_write_access : boolean
        flag indicating whether user credentials grant write-access to
        specified output directory in S3 bucket
    """

    # Import packages
    import os
    import tempfile

    import botocore.exceptions as bexc
    from indi_aws import fetch_creds

    # Init variables
    s3_str = 's3://'
    test_file = tempfile.mktemp()

    # Explicitly lower-case the "s3"
    if output_directory.lower().startswith(s3_str):
        out_dir_sp = output_directory.split('/')
        out_dir_sp[0] = out_dir_sp[0].lower()
        output_directory = '/'.join(out_dir_sp)

    # Get bucket name
    bucket_name = output_directory.replace(s3_str, '').split('/')[0]

    # Get bucket
    bucket = fetch_creds.return_bucket(creds_path, bucket_name)

    # Create local file
    with open(test_file, 'w') as f:
        f.write('test123')
    f.close()

    # Formulate test ouput key in bucket path output directory
    rel_key_path = output_directory.replace(os.path.join(s3_str, bucket_name),
                                            '').lstrip('/')
    write_test_key = os.path.join(rel_key_path, os.path.basename(test_file))

    # Attempt a write to bucket
    try:
        bucket.upload_file(test_file, write_test_key)
        print('S3 write access confirmed!')
        test_key = bucket.Object(key=write_test_key)
        test_key.delete()
        s3_write_access = True
    # Otherwise we set the access flag to false
    except bexc.ClientError:
        print('S3 write access is not available!')
        s3_write_access = False

    # Return the access flag
    return s3_write_access
コード例 #35
0
    def testConfig(self, event):
        '''
        This function runs when the user clicks the "Test Configuration"
        button in the pipeline configuration window.
        
        It prompts the user for a sample subject list (i.e. one that they will
        be using with the config they are building). Then it builds the
        pipeline but does not run it. It then reports whether or not the
        config will run or not depending on if the pipeline gets built
        successfully.
        '''

        # Import packages
        import os
        import yaml
        from CPAC.utils import Configuration

        from CPAC.pipeline.cpac_pipeline import prep_workflow
        from CPAC.pipeline.cpac_runner import build_strategies

        def display(win, msg, changeBg=True):
            wx.MessageBox(msg, "Error")
            if changeBg:
                win.SetBackgroundColour("pink")
            win.SetFocus()
            win.Refresh()

        # Collect a sample subject list and parse it in
        testDlg0 = wx.MessageDialog(
            self, 'This tool will run a quick check on the current pipeline '\
                  'configuration. Click OK to provide a subject list you ' \
                  'will be using with this setup.',
            'Subject List',
            wx.OK | wx.ICON_INFORMATION)
        testDlg0.ShowModal()
        testDlg0.Destroy()

        dlg = wx.FileDialog(self,
                            message="Choose the CPAC Subject list file",
                            defaultDir=os.getcwd(),
                            defaultFile="CPAC_subject_list.yml",
                            wildcard="YAML files(*.yaml, *.yml)|*.yaml;*.yml",
                            style=wx.OPEN | wx.CHANGE_DIR)

        if dlg.ShowModal() == wx.ID_OK:
            subListPath = dlg.GetPath()

        # Load and test the subject list
        print 'Checking subject list: %s...' % subListPath
        sublist = yaml.load(open(os.path.realpath(subListPath), 'r'))
        sub_flg = self.test_sublist(sublist)
        if not sub_flg:
            raise Exception
        print 'Subject list looks good!'
        # Following code reads in the parameters and selections from the
        # pipeline configuration window and populate the config_list

        config_list = []
        wf_counter = []

        for page in self.nb.get_page_list():

            switch = page.page.get_switch()

            ctrl_list = page.page.get_ctrl_list()
            validate = False

            if switch:
                switch_val = str(switch.get_selection()).lower()

                if switch_val == 'on' or switch_val == 'true' or \
                    switch_val == '1':

                    validate = True
                    wf_counter.append(page.get_counter())

            for ctrl in ctrl_list:

                # option_name will be the selection name as it is written
                # as the dictionary key of the config.yml dictionary
                option_name = ctrl.get_name()

                #validating
                if (switch == None or validate) and ctrl.get_validation() \
                    and (option_name != 'derivativeList') and \
                        (option_name != 'modelConfigs'):

                    win = ctrl.get_ctrl()

                    if isinstance(ctrl.get_selection(), list):
                        value = ctrl.get_selection()
                        if not value:
                            display(
                                win, "%s field is empty or the items are " \
                                     "not checked!" % ctrl.get_name(), False)
                            return

                    elif (option_name == "tsa_roi_paths") or \
                             (option_name == "sca_roi_paths"):

                        # fires if the control is the checkbox grid for
                        # multiple paths assigned to multiple options
                        # (i.e. timeseries analysis)

                        config_list.append(ctrl)
                        continue

                    else:
                        value = str(ctrl.get_selection())

                    if len(value) == 0:
                        display(win, "%s field is empty!" % ctrl.get_name())
                        return

                    if '/' in value and '$' not in value and not \
                        isinstance(value, list):

                        if not os.path.exists(ctrl.get_selection()) and \
                            value != 'On/Off':

                            display(
                                win, "%s field contains incorrect path. " \
                                "Please update the path!" % ctrl.get_name())
                            return

                config_list.append(ctrl)

        # Write out a pipeline_config file, read it in and then delete it
        # (Will revise the data structure of the config files later so this
        # can just pass the data structure instead of doing it this way)
        try:
            test_cfg_yml = '/tmp/test_config.yml'
            self.write(test_cfg_yml, config_list)
            c = Configuration(
                yaml.load(open(os.path.realpath(test_cfg_yml), 'r')))
            os.remove(test_cfg_yml)
        except:
            errDlg2 = wx.MessageDialog(
                self, 'A problem occurred with preparing the pipeline test run. \n\n' \
                      'Please ensure you have rights access to the directories you' \
                      ' have chosen for the CPAC working, crash, and output folders.',
                'Test Configuration Error',
                wx.OK | wx.ICON_ERROR)
            errDlg2.ShowModal()
            errDlg2.Destroy()

        if (1 in c.runNuisance) or (c.Regressors != None):
            strategies = sorted(build_strategies(c))
        else:
            strategies = None

        # Run the actual pipeline building prep and see if it works or not
        testDlg1 = wx.MessageDialog(
            self,
            'Click OK to run the test. This should take only a few seconds.',
            'Running Test', wx.OK | wx.ICON_INFORMATION)
        testDlg1.ShowModal()

        # Check file paths first

        # Just getting proper names of config file parameters
        try:
            params_file = open(
                p.resource_filename('CPAC',
                                    'GUI/resources/config_parameters.txt'),
                "r")
        except:
            print "Error: Could not open configuration parameter file.", "\n"
            raise Exception

        paramInfo = params_file.read().split('\n')

        paramList = []

        for param in paramInfo:

            if param != '':
                paramList.append(param.split(','))

        # function for file path checking
        def testFile(filepath, paramName, switch):
            try:
                if (1 in switch) and (filepath != None):
                    fileTest = open(filepath)
                    fileTest.close()
            except:

                testDlg1.Destroy()

                for param in paramList:
                    if param[0] == paramName:
                        paramTitle = param[1]
                        paramGroup = param[2]
                        break

                errDlgFileTest = wx.MessageDialog(
                    self, 'Error reading file - either it does not exist or '\
                          'you do not have read access. \n\n' \
                          'Parameter: %s \n' \
                          'In tab: %s \n\n' \
                          'Path: %s' % (paramTitle, paramGroup, filepath),
                    'Pipeline Not Ready',
                    wx.OK | wx.ICON_ERROR)
                errDlgFileTest.ShowModal()
                errDlgFileTest.Destroy()

        # Check S3 output bucket access if writing to S3
        output_dir = c.outputDirectory
        s3_str = 's3://'
        if output_dir.lower().startswith(s3_str):
            output_dir_sp = output_dir.split('/')
            output_dir_sp[0] = output_dir_sp[0].lower()
            output_dir = '/'.join(output_dir_sp)

        if type(output_dir) is str and output_dir.lower().startswith(s3_str):
            from indi_aws import fetch_creds
            creds_path = c.awsOutputBucketCredentials
            bucket_name = output_dir.split(s3_str)[1].split('/')[0]
            try:
                bucket = fetch_creds.return_bucket(creds_path, bucket_name)
                print 'Connection with output bucket "%s" successful!' % bucket_name
            except Exception as exc:
                err_msg = 'Unable to access output S3 bucket: "%s" with '\
                          'credentials in: "%s". Check bucket name '\
                          'and credentials file and try again'\
                          % (bucket_name, creds_path)
                testDlg1.Destroy()

                errDlg1 = wx.MessageDialog(self, err_msg, 'Pipeline Not Ready',
                                           wx.OK | wx.ICON_ERROR)
                errDlg1.ShowModal()
                errDlg1.Destroy()
                return

        testFile(c.template_brain_only_for_anat, \
                     'template_brain_only_for_anat',[1])
        testFile(c.template_skull_for_anat, 'template_skull_for_anat', [1])
        testFile(c.PRIORS_WHITE, 'PRIORS_WHITE',
                 c.runSegmentationPreprocessing)
        testFile(c.PRIORS_GRAY, 'PRIORS_GRAY', c.runSegmentationPreprocessing)
        testFile(c.PRIORS_CSF, 'PRIORS_CSF', c.runSegmentationPreprocessing)
        testFile(c.template_brain_only_for_func, \
                     'template_brain_only_for_func',c.runRegisterFuncToMNI)
        testFile(c.template_skull_for_func,'template_skull_for_func', \
                     c.runRegisterFuncToMNI)
        testFile(c.identityMatrix, 'identityMatrix', c.runRegisterFuncToMNI)
        testFile(c.boundaryBasedRegistrationSchedule, \
                     'boundaryBasedRegistrationSchedule', \
                     c.runRegisterFuncToAnat)
        testFile(c.lateral_ventricles_mask,'lateral_ventricles_mask', \
                     c.runNuisance)
        testFile(c.template_symmetric_brain_only, \
                     'template_symmetric_brain_only',c.runVMHC)
        testFile(c.template_symmetric_skull,'template_symmetric_skull', \
                     c.runVMHC)
        testFile(c.dilated_symmetric_brain_mask, \
                     'dilated_symmetric_brain_mask',c.runVMHC)
        testFile(c.configFileTwomm, 'configFileTwomm', c.runVMHC)
        testFile(c.templateSpecificationFile,'templateSpecificationFile', \
                     c.runNetworkCentrality)

        if c.tsa_roi_paths and type(c.tsa_roi_paths[0]) == dict:
            for roi_path in c.tsa_roi_paths[0].keys():
                testFile(roi_path, "tsa_roi_paths", c.runROITimeseries)
        if c.sca_roi_paths and type(c.sca_roi_paths[0]) == dict:
            for roi_path in c.sca_roi_paths[0].keys():
                testFile(roi_path, "sca_roi_paths", c.runSCA)
        try:
            # Run the pipeline building
            prep_workflow(sublist[0], c, strategies, 0)

        except Exception as xxx:

            print xxx
            print "an exception occurred"

            testDlg1.Destroy()

            errDlg1 = wx.MessageDialog(
                self, 'There are issues with the current configuration ' \
                      'which need to be resolved - please check to make ' \
                      'sure the options you are running have the proper ' \
                      'pre-requisites selected.\n\nIssue Info:\n%s' \
                      % str(xxx),
                'Pipeline Not Ready',
                wx.OK | wx.ICON_ERROR)
            errDlg1.ShowModal()
            errDlg1.Destroy()

        else:

            testDlg1.Destroy()

            okDlg1 = wx.MessageDialog(
                self, 'The current configuration will run successfully. You '\
                      'can safely save and run this setup!',
                'Pipeline Ready',
                wx.OK | wx.ICON_INFORMATION)
            okDlg1.ShowModal()
            okDlg1.Destroy()
コード例 #36
0
def return_bids_template(base_dir, scan_type, creds_path=None):
    '''
    Function that returns the path template of the desired scan type
    from a BIDS dataset

    Parameters
    ----------
    base_dir : string
        base directory of the BIDS dataset
    scan_type : string
        type of scan; e.g. 'anat', 'func', etc.
    creds_path : string (optional); default=None
        filepath to a set of AWS credentials to access a BIDS dataset
        stored on S3 that isn't public

    Returns
    -------
    file_template : string
        regular expression-compatible file template indicating data
        path organization
    '''

    # Import packages
    import os
    from indi_aws import fetch_creds

    # Init variables
    s3_str = 's3://'
    file_path = None

    # If base directory is in S3
    if base_dir.startswith(s3_str):
        bucket_name = base_dir.split('/')[2]
        s3_prefix = '/'.join(base_dir.split('/')[:3])

        # Extract base prefix to search through in S3
        prefix = base_dir.split('*')[0].replace(s3_prefix, '').lstrip('/')

        # Attempt to get bucket
        try:
            bucket = fetch_creds.return_bucket(creds_path, bucket_name)
        except Exception as exc:
            err_msg = 'There was an error in retrieving S3 bucket: %s.\nError: %s'\
                      %(bucket_name, exc)
            raise Exception(err_msg)

        # Get filepaths from S3 with prefix
        print 'Gathering files from S3 to parse...'
        for s3_obj in bucket.objects.filter(Prefix=prefix):
            file_path = s3_obj.key
            scan_dir = file_path.split('/')[-2]
            if scan_dir == scan_type:
                break
    # Else, the base directory is locally stored
    else:
        for root, dirs, files in os.walk(base_dir):
            if file_path:
                break
            for fil in files:
                file_path = os.path.join(root, fil)
                scan_dir = file_path.split('/')[-2]
                if fil.endswith('.nii.gz') and scan_dir == scan_type:
                    break
                else:
                    file_path = None

    # Now replace file_path intermediate dirs with *
    if file_path:
        rel_path = file_path.replace(base_dir, '').lstrip('/')
        interm_dirs = rel_path.split('/')[:-2]
        for imd in interm_dirs:
            file_path = file_path.replace(imd, '*')
    else:
        err_msg = 'Could not find any files in directory, check files!'
        raise Exception(err_msg)

    # Set template as any file *
    file_template = os.path.join(os.path.dirname(file_path), '*.nii.gz')

    # Return file pattern template
    return file_template
コード例 #37
0
def return_s3_filepaths(path_template, creds_path=None, bids_flag=False):
    '''
    Function to return the filepaths from an S3 bucket given a file
    pattern template and, optionally, credentials

    Parameters
    ----------
    path_template : string
        filepath template in the form of:
        's3://bucket_name/base_dir/{site}/{participant}/{session}/..
        ../file.nii.gz'; if bids_flag is set, path_template is just the
        base directory of the BIDS data set
    creds_path : string (optional); default=None
        filepath to a credentials file containing the AWS credentials
        to access the S3 bucket objects
    bids_flag : boolean (optional); default=False
        flag to indicate if the dataset to gather is organized to the
        BIDS standard

    Returns
    -------
    matched_s3_paths : list
        a list of strings of the filepaths from the S3 bucket
    '''

    # Import packages
    import fnmatch
    import logging
    import os
    import re

    from indi_aws import fetch_creds

    # Check for errors
    if not bids_flag:
        if not ('{site}' in path_template and '{participant}' in path_template):
            err_msg = 'Please provide \'{site}\' and \'{particpant}\' in '\
                      'filepath template where site and participant-level '\
                      'directories are present'
            raise Exception(err_msg)

    # Init variables
    bucket_name = path_template.split('/')[2]
    s3_prefix = '/'.join(path_template.split('/')[:3])

    # Get logger
    logger = logging.getLogger('sublist_builder')

    # Extract base prefix to search through in S3
    if bids_flag:
        prefix = path_template.split('*')[0].replace(s3_prefix, '').lstrip('/')
    else:
        prefix = path_template.split('{site}')[0].replace(s3_prefix, '').lstrip('/')

    # Attempt to get bucket
    try:
        bucket = fetch_creds.return_bucket(creds_path, bucket_name)
    except Exception as exc:
        err_msg = 'There was an error in retrieving S3 bucket: %s.\nError: %s'\
                  %(bucket_name, exc)
        logger.error(err_msg)
        raise Exception(err_msg)

    # Get filepaths from S3 with prefix
    logger.info('Gathering files from S3 to parse...')
    s3_filepaths = []
    for s3_obj in bucket.objects.filter(Prefix=prefix):
        s3_filepaths.append(str(s3_obj.key))

    # Prepend 's3://bucket_name/' on found paths
    s3_filepaths = [os.path.join(s3_prefix, s3_fp) for s3_fp in s3_filepaths]

    # File pattern filter
    if bids_flag:
        file_pattern = path_template
    else:
        file_pattern = path_template.replace('{site}', '*').\
                       replace('{participant}', '*').replace('{session}', '*')

    # Get only matching s3 paths
    s3_filepaths = fnmatch.filter(s3_filepaths, file_pattern)

    # Restrict filepaths and pattern to be of same directory depth
    # as fnmatch will expand /*/ recursively to .../*/*/...
    matched_s3_paths = []
    for s3fp in s3_filepaths:
        s3_split = s3fp.split('/')
        fp_split = file_pattern.split('/')
        if len(s3_split) == len(fp_split):
            matched_s3_paths.append(s3fp)

    # Print how many found
    num_s3_files = len(matched_s3_paths)
    logger.info('Found %d files!' % num_s3_files)

    # Return the filepaths as a list
    return matched_s3_paths
コード例 #38
0
ファイル: datasource.py プロジェクト: FCP-INDI/C-PAC
def check_for_s3(file_path, creds_path=None, dl_dir=None, img_type='other'):

    # Import packages
    import os
    import nibabel as nib
    import botocore.exceptions

    from indi_aws import fetch_creds

    # Init variables
    s3_str = 's3://'
    if creds_path:
        if "None" in creds_path or "none" in creds_path or \
                "null" in creds_path:
            creds_path = None

    if dl_dir is None:
        dl_dir = os.getcwd()

    if file_path is None:
        # in case it's something like scan parameters or field map files, but
        # we don't have any
        return None

    # TODO: remove this once scan parameter input as dictionary is phased out
    if isinstance(file_path, dict):
        # if this is a dictionary, just skip altogether
        local_path = file_path
        return local_path

    # Explicitly lower-case the "s3"
    if file_path.lower().startswith(s3_str):
        
        file_path = s3_str + file_path[len(s3_str):]

        # Get bucket name and bucket object
        bucket_name = file_path[len(s3_str):].split('/')[0]
        bucket = fetch_creds.return_bucket(creds_path, bucket_name)

        # Extract relative key path from bucket and local path
        s3_prefix = s3_str + bucket_name
        s3_key = file_path[len(s3_prefix) + 1:]
        local_path = os.path.join(dl_dir, bucket_name, s3_key)

        # Get local directory and create folders if they dont exist
        local_dir = os.path.dirname(local_path)
        if not os.path.exists(local_dir):
            os.makedirs(local_dir)

        # Download file
        try:
            print("Attempting to download from AWS S3: {0}".format(file_path))
            bucket.download_file(Key=s3_key, Filename=local_path)
        except botocore.exceptions.ClientError as exc:
            error_code = int(exc.response['Error']['Code'])

            err_msg = str(exc)
            if error_code == 403:
                err_msg = 'Access to bucket: "%s" is denied; using credentials '\
                          'in subject list: "%s"; cannot access the file "%s"'\
                          % (bucket_name, creds_path, file_path)
            elif error_code == 404:
                err_msg = 'File: {0} does not exist; check spelling and try '\
                          'again'.format(os.path.join(bucket_name, s3_key))
            else:
                err_msg = 'Unable to connect to bucket: "%s". Error message:\n%s'\
                          % (bucket_name, exc)
            
            raise Exception(err_msg)

        except Exception as exc:
            err_msg = 'Unable to connect to bucket: "%s". Error message:\n%s'\
                      % (bucket_name, exc)
            raise Exception(err_msg)

    # Otherwise just return what was passed in
    else:
        local_path = file_path

    # Check if it exists or it is sucessfuly downloaded
    if not os.path.exists(local_path):
        raise IOError('File %s does not exists!' % (local_path))

    # Check image dimensionality
    if local_path.endswith('.nii') or local_path.endswith('.nii.gz'):
        img_nii = nib.load(local_path)

        if img_type == 'anat':
            if len(img_nii.shape) != 3:
                raise IOError('File: %s must be an anatomical image with 3 '\
                              'dimensions but %d dimensions found!'
                              % (local_path, len(img_nii.shape)))
        elif img_type == 'func':
            if len(img_nii.shape) != 4:
                raise IOError('File: %s must be a functional image with 4 '\
                              'dimensions but %d dimensions found!'
                              % (local_path, len(img_nii.shape)))

    return local_path
コード例 #39
0
    s3_bucket = "fcp-indi"
    s3_creds = "/Users/cameron.craddock/AWS/ccraddock-fcp-indi-keys2.csv"
    s3_prefix = "data/Projects/ADHD200/RawDataBIDS"
    s3_sitedirs = [
        "Brown", "KKI", "NeuroIMAGE", "NYU", "OHSU", "Peking_1", "Peking_2",
        "Peking_3", "Pittsburgh", "WashU"
    ]
    out_prefix = "data/ADHD200/RawDataBIDS"
    max_subjs = 4

    if s3_creds:
        if not os.path.isfile(s3_creds):
            raise IOError("Could not filed aws_input_creds (%s)" % (s3_creds))

    from indi_aws import fetch_creds
    bucket = fetch_creds.return_bucket(s3_creds, s3_bucket)

    for site in s3_sitedirs:
        subjects = []

        prefix = os.path.join(s3_prefix, site)
        print "gathering files from S3 bucket (%s) for %s" % (bucket, prefix)

        for s3_obj in bucket.objects.filter(Prefix=prefix):
            if 'T1w' in str(s3_obj.key) or 'bold' in str(s3_obj.key):
                fname = os.path.basename(str(s3_obj.key))
                if "sub-" not in fname:
                    if not os.path.exists(
                            os.path.dirname(s3_obj.key).replace(
                                s3_prefix, out_prefix)):
                        print "making the directory"
コード例 #40
0
def collect_bids_files_configs(bids_dir, aws_input_creds=''):
    """

    :param bids_dir:
    :param aws_input_creds:
    :return:
    """

    file_paths = []
    config_dict = {}

    if bids_dir.lower().startswith("s3://"):
        # s3 paths begin with s3://bucket/
        bucket_name = bids_dir.split('/')[2]
        s3_prefix = '/'.join(bids_dir.split('/')[:3])
        prefix = bids_dir.replace(s3_prefix, '').lstrip('/')

        if aws_input_creds:
            if not os.path.isfile(aws_input_creds):
                raise IOError("Could not find aws_input_creds (%s)" %
                              (aws_input_creds))

        from indi_aws import fetch_creds
        bucket = fetch_creds.return_bucket(aws_input_creds, bucket_name)

        print "gathering files from S3 bucket (%s) for %s" % (bucket, prefix)

        for s3_obj in bucket.objects.filter(Prefix=prefix):
            # we only know how to handle T1w and BOLD files, for now
            if 'T1w' in str(s3_obj.key) or 'bold' in str(s3_obj.key):
                if str(s3_obj.key).endswith("json"):
                    try:
                        config_dict[s3_obj.key.replace(prefix, "").lstrip('/')] \
                            = json.loads(s3_obj.get()["Body"].read())
                    except Exception as e:
                        print ("Error retrieving %s (%s)" %
                               (s3_obj.key.replace(prefix, ""),
                                e.message))
                        raise
                elif 'nii' in str(s3_obj.key):
                    file_paths.append(str(s3_obj.key)
                                      .replace(prefix,'').lstrip('/'))

    else:
        for root, dirs, files in os.walk(bids_dir, topdown=False):
            if files:
                file_paths += [os.path.join(root, f).replace(bids_dir,'')
                                   .lstrip('/')
                               for f in files
                               if 'nii' in f and ('T1w' in f or 'bold' in f)]
                config_dict.update(
                    {os.path.join(root.replace(bids_dir, '').lstrip('/'), f):
                         json.load(open(os.path.join(root, f), 'r'))
                     for f in files
                     if f.endswith('json') and ('T1w' in f or 'bold' in f)})

    if not file_paths and not config_dict:
        raise IOError("Didn't find any files in %s. Please verify that the"
            " path is typed correctly, that you have read access to the"
            " directory, and that it is not empty.".format(bids_dir))

    return file_paths, config_dict
コード例 #41
0
dryrun = True

# Prefixes for reference files to copy from.
peerone = 'data/Projects/RocklandSample/RawDataBIDS/sub-A00064081/ses-NFB3/func/sub-A00064081_ses-NFB3_task-PEER1_events.tsv'
peertwo = 'data/Projects/RocklandSample/RawDataBIDS/sub-A00064081/ses-NFB3/func/sub-A00064081_ses-NFB3_task-PEER2_events.tsv'
checkerboardone = 'data/Projects/RocklandSample/RawDataBIDS/sub-A00064416/ses-DSA/func/sub-A00064416_ses-DSA_task-CHECKERBOARD_acq-1400_events.tsv'
checkerboardtwo = 'data/Projects/RocklandSample/RawDataBIDS/sub-A00064416/ses-DSA/func/sub-A00064416_ses-DSA_task-CHECKERBOARD_acq-645_events.tsv'
breathhold = 'data/Projects/RocklandSample/RawDataBIDS/sub-A00064416/ses-DSA/func/sub-A00064416_ses-DSA_task-BREATHHOLD_acq-1400_events.tsv'

# Create bucket object
s3_bucket_name = 'fcp-indi'
s3_prefix = 'data/Projects/RocklandSample/RawDataBIDS'
s3 = boto3.resource('s3')
s3_creds_path = '/path/to/jpellman-fcp-indi-keys.csv'
bucket = fetch_creds.return_bucket(s3_creds_path, s3_bucket_name)
s3_keys = bucket.objects.filter(Prefix=s3_prefix)

# Get the keys for NifTIs without events TSVs.
keylist = [key.key for key in s3_keys]
peerone_keylist = [
    key for key in keylist if 'PEER1' in key and '.nii.gz' in key
    and key.replace('_bold.nii.gz', '_events.tsv') not in keylist
]
peertwo_keylist = [
    key for key in keylist if 'PEER2' in key and '.nii.gz' in key
    and key.replace('_bold.nii.gz', '_events.tsv') not in keylist
]
checkerboardone_keylist = [
    key for key in keylist
    if 'CHECKERBOARD_acq-1400' in key and '.nii.gz' in key