Ejemplo n.º 1
0
def sync(Lochness, subject, dry):
    delete = lochness.dropbox.delete_on_success(Lochness, Basename)
    logger.debug('delete_on_success for {0} is {1}'.format(Basename, delete))
    for dbx_sid in subject.dropbox[Module]:
        logger.debug('exploring {0}/{1}'.format(subject.study, subject.id))
        _passphrase = keyring.passphrase(Lochness, subject.study)
        enc_key = cryptease.kdf(_passphrase)
        api_token = keyring.dropbox_api_token(Lochness, Module)
        client = dropbox.Dropbox(api_token)
        for datatype, products in iter(CONFIG.items()):
            dbx_head = os.path.join(os.sep, datatype, subject.study, dbx_sid)
            dbx_head_len = len(dbx_head)
            logger.debug('walking %s', dbx_head)
            for root, dirs, files in lochness.dropbox.walk(client, dbx_head):
                for f in files:
                    dbx_tail = os.path.join(root,
                                            f)[dbx_head_len:].lstrip(os.sep)
                    dbx_file = (dbx_head, dbx_tail)
                    product = _find_product(dbx_tail,
                                            products,
                                            subject=dbx_sid)
                    if not product:
                        continue
                    protect = product.get('protect', False)
                    compress = product.get('compress', False)
                    key = enc_key if protect else None
                    output_base = subject.protected_folder if protect else subject.general_folder
                    output_base = tree.get(datatype, output_base)
                    lochness.dropbox.save(client,
                                          dbx_file,
                                          output_base,
                                          key=key,
                                          compress=compress,
                                          delete=delete,
                                          dry=dry)
Ejemplo n.º 2
0
def sync(Lochness, subject, dry):
    delete = lochness.dropbox.delete_on_success(Lochness, Basename)
    logger.debug('delete_on_success for {0} is {1}'.format(Basename, delete))
    for dbx_sid in subject.dropbox[Module]:
        logger.debug('exploring {0}/{1}'.format(subject.study, subject.id))
        _passphrase = keyring.passphrase(Lochness, subject.study)
        enc_key = cryptease.kdf(_passphrase)
        api_token = keyring.dropbox_api_token(Lochness, Module)
        client = dropbox.Dropbox(api_token)
        patterns = _batch_compile(CONFIG, dbx_sid)
        for category,datatype in _iterate(CONFIG):
            output_base = subject.protected_folder if category == 'PROTECTED' else subject.general_folder
            output_base = tree.get(datatype, output_base)
            dbx_head = os.path.join(os.sep, datatype, subject.study)
            # shim the dropbox head for certain data types
            if datatype == 'onsite_interview':
                dbx_head = os.path.join(dbx_head, 'output')
            elif datatype == 'behav_qc':
                dbx_head = os.path.join(dbx_head, dbx_sid)
            dbx_head_len = len(dbx_head)
            for root,dirs,files in lochness.dropbox.walk(client, dbx_head):
                for f in files:
                    dbx_tail = os.path.join(root, f)[dbx_head_len:].lstrip(os.sep)
                    dbx_file = dbx_head,dbx_tail
                    if patterns[datatype].match(dbx_tail):
                        key = enc_key if category == 'PROTECTED' else None
                        lochness.dropbox.save(client, dbx_file, output_base,
                                             key=key, delete=delete, dry=dry)
Ejemplo n.º 3
0
 def write_keyring_and_encrypt(self):
     with open(self.keyring_loc, 'w') as f:
         json.dump(self.keyring, f)
     
     keyring_content = open(self.keyring_loc, 'rb')
     key = crypt.kdf('')
     crypt.encrypt(
             keyring_content, key,
             filename=self.tmp_lochness_dir / '.lochness.enc')
Ejemplo n.º 4
0
def main():
    parser = ap.ArgumentParser('File encryption/decryption utility')
    group1 = parser.add_mutually_exclusive_group(required=True)
    group1.add_argument('--decrypt', action='store_true', help='Decrypt file')
    group1.add_argument('--encrypt', action='store_true', help='Encrypt file')
    parser.add_argument('-o', '--output-file', help='Output file')
    parser.add_argument('--debug',
                        action='store_true',
                        help='Enable debug messages')
    parser.add_argument('file', help='File to encrypt or decrypt')
    args = parser.parse_args()

    # read passphrase (ask twice for --encrypt)
    if 'ENCRYPT_PASS' in os.environ:
        passphrase = os.environ['ENCRYPT_PASS']
    else:
        passphrase = gp.getpass('enter passphrase: ')
        if args.encrypt:
            reentered = gp.getpass('re-enter passphrase: ')
            if passphrase != reentered:
                logger.critical('passphrases do not match')
                sys.exit(1)

    # get file handle
    raw = get(args.file)

    # get key using file header, or build a new one
    if args.decrypt:
        key = crypt.key_from_file(raw, passphrase)
    else:
        key = crypt.kdf(passphrase)

    # lock or unlock the file
    if args.decrypt:
        if not args.output_file:
            stdout = os.fdopen(sys.stdout.fileno(), 'wb')
            for chunk in crypt.decrypt(raw, key):
                stdout.write(chunk)
        else:
            if overwrite(args.output_file):
                logger.info('saving {}'.format(args.output_file))
                crypt.decrypt(raw, key, filename=args.output_file)
    elif args.encrypt:
        if not args.output_file:
            stdout = os.fdopen(sys.stdout.fileno(), 'wb')
            for chunk in crypt.encrypt(raw, key):
                stdout.write(chunk)
        else:
            if overwrite(args.output_file):
                logger.info('saving {}'.format(args.output_file))
                crypt.encrypt(raw, key, filename=args.output_file)
Ejemplo n.º 5
0
def update_keyring_and_encrypt_DPACC(tmp_lochness_dir: str):
    keyring_loc = Path(tmp_lochness_dir) / 'lochness.json'
    with open(keyring_loc, 'r') as f:
        keyring = json.load(f)

    keyring['lochness_sync']['PATH_IN_HOST'] = '.'

    with open(keyring_loc, 'w') as f:
        json.dump(keyring, f)

    keyring_content = open(keyring_loc, 'rb')
    key = crypt.kdf('')
    crypt.encrypt(keyring_content,
                  key,
                  filename=Path(tmp_lochness_dir) / '.lochness.enc')
Ejemplo n.º 6
0
def main():
    parser = ap.ArgumentParser('File encryption/decryption utility')
    parser.add_argument('--keyring',
                        default='~/.nrg-keyring.enc',
                        type=os.path.expanduser,
                        help='Keyring file')
    group = parser.add_mutually_exclusive_group()
    group.add_argument('--phoenix-study',
                       help='Return passphrase for PHOENIX study')
    group.add_argument('--beiwe-study',
                       help='Return passphrase for Beiwe study')
    parser.add_argument('--debug',
                        action='store_true',
                        help='Enable debug messages')
    args = parser.parse_args()

    # read encrypted keyring file
    raw = open(args.keyring)

    # get salt from raw file header
    salt = None
    header, _ = crypt.read_header(raw)
    salt = header['kdf'].salt

    # read passphrase from an env or command line
    if 'NRG_KEYRING_PASS' in os.environ:
        passphrase = os.environ['NRG_KEYRING_PASS']
    else:
        passphrase = gp.getpass('enter passphrase: ')

    # construct decryption key
    key = crypt.kdf(passphrase, salt=salt)

    # decrypt the keyring content
    content = ''
    for chunk in crypt.decrypt(raw, key):
        content += chunk
    js = json.loads(content)

    # return what the user requested
    try:
        if args.phoenix_study:
            sys.stdout.write(js['kitchen']['SECRETS'][args.phoenix_study])
        elif args.beiwe_study:
            sys.stdout.write(js['beiwe']['SECRETS'][args.beiwe_study])
    except KeyError as e:
        logger.critical('key not found {0}'.format(e))
        sys.exit(1)
Ejemplo n.º 7
0
def test_write_file():
    original = b'''Lorem ipsum dolor sit amet, consectetur adipiscing elit,
                sed do eiusmod tempor incididunt ut labore et dolore 
                magna aliqua. Ut enim ad minim veniam, quis nostrud 
                exercitation ullamco laboris nisi ut aliquip ex ea commodo 
                consequat.'''
    passphrase = 'foo bar biz bat'
    key = crypt.kdf(passphrase)
    with tf.NamedTemporaryFile(dir=DIR, prefix='enc', delete=False) as enc_tmp:
        crypt.encrypt_to_file(enc_tmp.name, io.BytesIO(original), key)
    key = None
    with tf.NamedTemporaryFile(dir=DIR, prefix='dec', delete=False) as dec_tmp:
        with open(enc_tmp.name, 'rb') as fo:
            key = crypt.key_from_file(fo, passphrase)
            crypt.decrypt_to_file(dec_tmp.name, fo, key)
    os.remove(enc_tmp.name)
    with open(dec_tmp.name, 'rb') as fo:
        decrypted = fo.read()
    os.remove(dec_tmp.name)
    assert decrypted == original
Ejemplo n.º 8
0
def save(Keyring, archive, user_id, output_dir, lock=None, passphrase=None):
    '''
    The order of operations here is important to ensure the ability 
    to reach a state of consistency:

        1. Save the file
        2. Update the local registry
    '''
    num_saved = 0
    if not archive:
        return num_saved
    encoding = locale.getpreferredencoding()
    if not lock:
        lock = list()
    else:
        if not passphrase:
            raise SaveError(
                'if you wish to lock a data type, you need a passphrase')
    lock_ext = LOCK_EXT.lstrip('.')
    # open registry file in downloaded archive
    logger.debug('reading registry file from beiwe archive')
    with archive.open('registry', 'r') as fo:
        registry = json.loads(fo.read().decode('utf-8'))
    # if archive registry contains any entries, process them
    if registry:
        # iterate over archive members
        for member in archive.namelist():
            # skip over the registry file
            if member == 'registry':
                continue
            # get information about the current archive member
            info = archive.getinfo(member)
            # parse the data type name from the current archive member
            member_datatype = _parse_datatype(member, user_id)
            # check if data type should be encrypted
            encrypt = True if member_datatype in lock else False
            logger.debug('processing archive member: %s (lock=%s)', member,
                         encrypt)
            # create target name
            target = member
            # add lock extension to target name if necessary
            if encrypt:
                target = '{0}.{1}'.format(target, lock_ext)
            target_abs = os.path.join(output_dir, target)
            target_dir = os.path.dirname(target_abs)
            # detect if target exists
            if os.path.exists(target_abs):
                os.remove(target_abs)
            # create target directory
            if not os.path.exists(target_dir):
                _makedirs(target_dir, umask=0o5022)
            # read archive member content and encrypt it if necessary
            content = archive.open(member)
            if encrypt:
                key = crypt.kdf(passphrase)
                crypt.encrypt(content,
                              key,
                              filename=target_abs,
                              permissions=0o0644)
            else:
                # write content to persistent storage
                _atomic_write(target_abs, content.read())
            num_saved += 1
        # update local registry file to avoid re-downloading these files
        local_registry_file = os.path.join(output_dir, user_id, '.registry')
        local_registry = dict()
        if os.path.exists(local_registry_file):
            with open(local_registry_file, 'r') as fo:
                local_registry = json.load(fo)
        local_registry.update(registry)
        local_registry_str = json.dumps(local_registry, indent=2)
        _atomic_write(local_registry_file, local_registry_str.encode(encoding))
    # return the number of saved files
    return num_saved
Ejemplo n.º 9
0
def sync_module(Lochness: 'lochness.config', subject: 'subject.metadata',
                study_name: 'mediaflux.study_name', dry: bool):
    '''sync mediaflux data for the subject'''

    if dry:
        raise NotImplementedError('--dry option is not implemented')

    study_basename = study_name.split('.')[1]

    for mf_subid in subject.mediaflux[study_name]:
        logger.debug(f'exploring {subject.study}/{subject.id}')
        _passphrase = keyring.passphrase(Lochness, subject.study)
        enc_key = enc.kdf(_passphrase)

        mflux_cfg = keyring.mediaflux_api_token(Lochness, study_name)

        mf_base = base(Lochness, study_basename)

        print(mf_base)

        for datatype, products in \
            iter(Lochness['mediaflux'][study_basename]['file_patterns'].items()):

            print(datatype, products)
            '''
            file_patterns:
                actigraphy:
                    - vendor: Philips
                      product: Actiwatch 2
                      data_dir: all_BWH_actigraphy
                      pattern: 'accel/*csv'
                      protect: True
                    - vendor: Activinsights
                      product: GENEActiv
                      data_dir: all_BWH_actigraphy
                      pattern: 'GENEActiv/*bin,GENEActiv/*csv'
            
            '''

            for prod in products:
                for patt in prod['pattern'].split(','):

                    # consider the case with space
                    # pattern: 'GENEActiv/*bin, GENEActiv/*csv'
                    patt = patt.strip()

                    if '*' not in patt:
                        raise PatternError(
                            'Mediaflux pattern must include an asterisk e.g. *csv or GENEActiv/*csv'
                        )

                    # construct mediaflux remote dir
                    mf_remote_pattern = pjoin(mf_base, prod['data_dir'],
                                              mf_subid, patt)
                    mf_remote_dir = dirname(mf_remote_pattern)

                    # obtain mediaflux remote paths
                    with tempfile.TemporaryDirectory() as tmpdir:
                        diff_path = pjoin(tmpdir, 'diff.csv')
                        cmd = (' ').join([
                            'unimelb-mf-check', '--mf.config', mflux_cfg,
                            '--nb-retries 5', '--direction down', tmpdir,
                            mf_remote_dir, '-o', diff_path
                        ])

                        p = Popen(cmd, shell=True)
                        p.wait()

                        # ENH
                        # if dry: exit()

                        if not isfile(diff_path):
                            continue

                        df = pd.read_csv(diff_path)
                        for remote in df['SRC_PATH'].values:

                            if remote is nan:
                                continue

                            if not re.search(patt.replace('*', '(.+?)'),
                                             remote):
                                continue
                            else:
                                remote = remote.split(':')[1]

                            # construct local path
                            protect = prod.get('protect', True)
                            processed = prod.get('processed', False)
                            key = enc_key if protect else None
                            subj_dir = subject.protected_folder \
                                if protect else subject.general_folder

                            # mf_local= pjoin(subj_dir, datatype, dirname(patt), basename(remote))
                            mf_local = str(
                                tree.get(datatype,
                                         subj_dir,
                                         processed=processed,
                                         BIDS=Lochness['BIDS']))

                            # ENH set different permissions
                            # GENERAL: 0o755, PROTECTED: 0700
                            os.makedirs(mf_local, exist_ok=True)

                            # subprocess call unimelb-mf-download
                            cmd = (' ').join([
                                'unimelb-mf-download', '--mf.config',
                                mflux_cfg, '-o', mf_local, '--nb-retries 5',
                                f'\"{remote}\"'
                            ])

                            p = Popen(cmd, shell=True)
                            p.wait()

                            # verify checksum after download completes
                            # if checksum does not match, data will be downloaded again
                            # ENH should we verify checksum 5 times?
                            cmd += ' --csum-check'
                            p = Popen(cmd, shell=True)
                            p.wait()
Ejemplo n.º 10
0
def sync_module(Lochness: 'lochness.config', subject: 'subject.metadata',
                module_name: 'box.module_name', dry: bool):
    '''Sync box data for the subject'''

    # only the module_name string without 'box.'
    module_basename = module_name.split('.')[1]

    # delete on success
    delete = delete_on_success(Lochness, module_basename)
    logger.debug(f'delete_on_success for {module_basename} is {delete}')

    for bx_sid in subject.box[module_name]:
        logger.debug(f'exploring {subject.study}/{subject.id}')
        _passphrase = keyring.passphrase(Lochness, subject.study)
        enc_key = enc.kdf(_passphrase)

        client_id, client_secret, api_token = keyring.box_api_token(
            Lochness, module_name)

        # box authentication
        auth = OAuth2(
            client_id=client_id,
            client_secret=client_secret,
            access_token=api_token,
        )
        client = Client(auth)

        bx_base = base(Lochness, module_basename)

        # get the id of the bx_base path in box
        bx_base_obj = get_box_object_based_on_name(client, bx_base, '0')

        if bx_base_obj == None:
            logger.debug('Root of the box is not found')
            continue

        # loop through the items defined for the BOX data
        for datatype, products in iter(
                Lochness['box'][module_basename]['file_patterns'].items()):
            subject_obj = get_box_object_based_on_name(client, bx_sid,
                                                       bx_base_obj.id)

            if subject_obj == None:
                logger.debug(f'{bx_sid} is not found under {bx_base_obj}')
                continue

            datatype_obj = get_box_object_based_on_name(
                client, datatype, subject_obj.id)

            # full path
            bx_head = join(bx_base, datatype, bx_sid)

            logger.debug('walking %s', bx_head)

            # if the directory is empty
            if datatype_obj == None:
                continue

            # walk through the root directory
            for root, dirs, files in walk_from_folder_object(
                    bx_head, datatype_obj):

                for box_file_object in files:
                    bx_tail = join(basename(root), box_file_object.name)
                    product = _find_product(bx_tail, products, subject=bx_sid)
                    if not product:
                        continue

                    protect = product.get('protect', False)
                    output_base = subject.protected_folder \
                                  if protect else subject.general_folder

                    encrypt = product.get('encrypt', False)
                    key = enc_key if encrypt else None

                    processed = product.get('processed', False)

                    # For DPACC, get processed from the config.yml
                    output_base = tree.get(datatype,
                                           output_base,
                                           processed=processed,
                                           BIDS=Lochness['BIDS'])

                    compress = product.get('compress', False)

                    save(box_file_object, (root, box_file_object.name),
                         output_base,
                         key=key,
                         compress=compress,
                         delete=False,
                         dry=False)