コード例 #1
0
ファイル: sync.py プロジェクト: gmega/gdsyncpy
def resume_sync(service, args):
    if args.clear:
        _clear_sync_states(service)
    else:
        state = _latest_sync_state(service)
        eprint('Resuming sync from state %s' % state.full_path)
        _sync(service, state)
コード例 #2
0
ファイル: dedup.py プロジェクト: gmega/gdsyncpy
def dedup_apply(service, args):
    eprint('Now computing and removing duplicates. Prefix order is %s.' % args.prefixes)
    duplicates, paths = compute_duplicates(
        service, get_snapshot(service, args)
    )

    # Strips white spaces.
    prefixes = [prefix.strip() for prefix in args.prefixes.split(',')]
    # Adds trailing backlash to make prefixes unique.
    prefixes = [prefix + ('/' if not prefix.endswith('/') else '') for prefix in prefixes]

    runner = ErrorHandlingRunner(service, delegate=GAPIBatchRunner)
    for md5, entries in duplicates.items():
        preferences = list(zip(entries, list(rank(prefixes, [paths[entry] for entry in entries]))))
        for duplicate, _ in sorted(preferences, key=lambda x: x[1])[1:]:
            rid = '%s (%s)' % (paths[duplicate], duplicate.id)
            eprint('Queue request for deleting duplicate %s' % rid)
            runner.add(request_id=rid, request=duplicate.delete())

    if not args.dry_run:
        eprint('\n --- Now running %d deletion requests in batch.' % len(runner.requests))
        for rid, result, _ in runner.execute():
            eprint('Successfully deleted %s' % rid)
    else:
        eprint('Dry run. No changes applied.')
コード例 #3
0
ファイル: sync.py プロジェクト: gmega/gdsyncpy
def _latest_sync_state(service) -> SyncState:
    path = getcwd()
    states = sorted(list(SyncState.sync_states(path, service)),
                    key=lambda x: x.timestamp,
                    reverse=True)
    if len(states) > 1:
        eprint('More than one sync state found. Will use the most recent.')
    elif len(states) == 0:
        eprint('No sync state found in %s. Nothing to resume.' % path)
        sys.exit(-1)
    return states[0]
コード例 #4
0
def connect():
    flow = _connect_flow(path.join(path.dirname(__file__), GAPI_ID))
    payload, need_auth = next(flow)
    if need_auth:
        eprint(
            'No valid authentication token found. Authorization required. Open the '
            'following URL in your browser: \n' + payload)
        payload, _ = flow.send(
            input('And enter the Google Drive auth key here:'))

    return payload
コード例 #5
0
 def store(self):
     eprint(
         'Saving sync state at %s. Use _resume_ to resume sync in case of failures.'
         % self.full_path)
     try:
         self.stored = True
         Path(self.full_path).write_text(json.dumps(
             SyncStateSchema().dump(self).data),
                                         encoding='utf-8')
     except Exception:
         self.stored = False
         raise
コード例 #6
0
ファイル: sync.py プロジェクト: gmega/gdsyncpy
def _cacheable_exclusions(service, args) -> Snapshot:
    ss = Snapshot()
    # Stored snapshots and remote folders are cached.
    for _snapshot in args.exclude_snapshot:
        eprint('Reading stored snapshot at <%s>' % _snapshot)
        ss = ss.merge(load_snapshot(service, _snapshot))

    for remote_folder in args.exclude_folder:
        eprint('Examining contents of Google Drive folder <%s>' %
               remote_folder)
        ss = ss.merge(snapshot(service, [remote_folder]))
    return ss
コード例 #7
0
ファイル: sync.py プロジェクト: gmega/gdsyncpy
def _new_sync_state(service, args) -> SyncState:
    # Compute MD5s for local files and fetches remotes from snapshot.
    eprint('Computing MD5 hashes for files under %s' % args.local)

    local = []
    for local_file in _local_files(args.local):
        if not _mime_allow(local_file, args.include_pictures_only):
            eprint('Exclude file %s with MIME type \'%s\'' %
                   (local_file.path, local_file.mime_type))
        else:
            local.append(local_file)

    if not args.allow_duplicates:
        _check_duplicates(_by_md5(local))

    return SyncState(_cacheable_exclusions(service, args), args, local,
                     getcwd())
コード例 #8
0
ファイル: sync.py プロジェクト: gmega/gdsyncpy
def _sync(service, state: SyncState):
    args = state.args
    local = _by_md5(state.local_files)

    # Read dynamic exclusions.
    state.snapshot = state.snapshot.merge(
        _uncacheable_exclusions(service, state.args))

    remote_folder = unique(ResourcePath.from_name(service, args.remote),
                           args.remote)
    remote = set(entry.md5Checksum for entry in state.snapshot.entries
                 if isinstance(entry, DriveFile))

    # We sync whatever is missing in the remote MD5 set according to the exclusion lists.
    to_sync = local.keys() - remote
    n_sync = len(to_sync)
    eprint('There are %d local files, %d remote files' %
           (len(local), len(remote)))
    eprint('%d files will be synced' % n_sync)

    # Uploads are better handled sequentially: they're unsupported by the batch API
    # (https://developers.google.com/drive/api/v3/batch) and are apparently handled sequentially on Google's side
    # (https://stackoverflow.com/questions/10311969/what-is-the-limit-on-google-drive-api-usage).
    # Indeed, since the main bottleneck is probably the client's upload speed anyways, handling concurrent upload
    # requests is probably pointless.
    runner = ErrorHandlingRunner(service, delegate=SequentialRequestRunner)

    for i, key in enumerate(to_sync, start=1):
        local_files = local[key]
        runner.add(request_id=local_files[0].path,
                   request=remote_folder.create_file(local_files[0].path))

    if not state.stored:
        state.store()

    eprint('Uploading %d files.' % n_sync)
    if state.args.dry_run:
        eprint('Dry run: no changes made.')
        return

    for i, result in enumerate(runner.execute(), start=1):
        eprint('%s successfully uploaded to %s (%d of %d)' %
               (result.id, state.args.remote, i, n_sync))

    state.clear()
コード例 #9
0
def snapshot(service, folder_paths):
    folders = [
        u(ResourcePath.from_name(service, path), path) for path in folder_paths
    ]

    entries = []
    for folder in folders:
        if not isinstance(folder, DriveFolder):
            eprint('<%s> is not a folder. Aborting.' % str(folder))
            sys.exit(-1)

        entries.extend(folder.list(recurse=True))

    # Some entries may be scooped in more than once if root folders
    # contain some of the same subfolders (which would imply someone
    # has more than one parent). We discard these spurious duplicates
    # by keeping entries with unique ids.
    unique = set(entries)

    eprint('There were %d entries, %d unique.' % (len(entries), len(unique)))

    return Snapshot(list(unique))
コード例 #10
0
ファイル: sync.py プロジェクト: gmega/gdsyncpy
def _local_files(local_folder, recurse=True) -> List[LocalFile]:
    file_paths = os.listdir(local_folder)
    files = []
    folders = []
    for file_path in file_paths:
        full_path = os.path.join(local_folder, file_path)
        if os.path.isdir(full_path):
            folders.append(full_path)
            continue

        eprint('Analyzing %s' % full_path)
        files.append(
            LocalFile(path=full_path,
                      mime_type=detector.from_file(full_path),
                      md5_checksum=hashlib.md5(
                          Path(full_path).read_bytes()).hexdigest()))

    if recurse:
        for folder in folders:
            eprint('Recursing into %s' % folder)
            files.extend(_local_files(folder, recurse))

    return files
コード例 #11
0
def _credentials(apisecret):
    flow = client.flow_from_clientsecrets(
        apisecret,
        scope='https://www.googleapis.com/auth/drive',
        redirect_uri='urn:ietf:wg:oauth:2.0:oob')

    storage = ofile.Storage('.credentials.json')
    exists = path.exists('.credentials.json')
    if exists:
        credentials = storage.get()

    if not exists or credentials.invalid:
        auth_uri = flow.step1_get_authorize_url()
        code = yield (auth_uri, True)
        credentials = flow.step2_exchange(code)
        storage.put(credentials)

    if credentials is None:
        raise ValueError('Failed to obtain access credentials.')

    eprint('Google Drive authentication successful.')

    yield (credentials, False)
コード例 #12
0
ファイル: sync.py プロジェクト: gmega/gdsyncpy
def _check_duplicates(local):
    duplicates = {k: v for k, v in local.items() if len(v) > 1}
    if len(duplicates) == 0:
        return

    eprint(
        'Error: duplicates found in the local folder. Re-run with --allow-duplicates to '
        'run the synchronization anyway. Duplicates are listed as follows.\n' %
        local)

    for md5, entries in duplicates.items():
        eprint('------ %s -------' % md5)
        for entry in entries:
            eprint(entry)
        eprint('')

    sys.exit(-1)
コード例 #13
0
ファイル: http.py プロジェクト: gmega/gdsyncpy
    def execute(self) -> Generator[RequestResult, None, None]:
        pending = {str(request.id): request for request in self.requests}
        warnings = set()
        attempts = 1
        backoff = self.min_backoff

        start = time.monotonic()
        while ((time.monotonic() - start) < self.timeout) and (attempts <= self.max_retries):
            for result in self._run_requests(pending.values()):
                if result.error is None:
                    # No errors with the current request, just return the element.
                    del pending[result.id]
                    yield result
                    # Resets the retry and backoff counters as at least one request got through.
                    attempts = 1
                    backoff = self.min_backoff
                    continue

                # Got an error. Let's see what policies we have for that.
                policy, error_code = self.policies.matching(result.error)

                # Case 1: Don't know how to handle this. Just bubble it up.
                if policy is None:
                    raise result.error

                # Prints error-specific warning.
                if policy.print_always or (error_code not in warnings):
                    policy.print_warning(rid=result.id)
                    warnings.add(error_code)

                # Case 2: Should skip the current entry as if it were already satisfied.
                if policy.action == HttpErrorPolicy.SKIP:
                    del pending[result.id]
                    continue

                # Case 3: Should retry after we're done.
                elif policy.action == HttpErrorPolicy.RETRY:
                    pass

                # Case 4: After having printed the warning message, we should fail after.
                elif policy.action == HttpErrorPolicy.FAIL:
                    raise result.error

                # Case 5: Should never happen, so we throw an error if it does.
                else:
                    raise Exception('Don\'t know how to handle action %d.' % policy.action)

            # All requests satisfied: we're done.
            if not pending:
                return

            # If there are still pending requests left, this means we have to retry them. Attempts to back off.
            eprint('Some of the requests could not be fulfilled. Backing off and retrying.')

            # Backs off.
            time.sleep(backoff)
            backoff = backoff * 2
            attempts += 1

        raise Exception(
            'Could not process request(s). %s' % (
                'Too many retry attempts.' if attempts > self.max_retries else 'Operation timed out.'
            )
        )
コード例 #14
0
ファイル: sync.py プロジェクト: gmega/gdsyncpy
def _uncacheable_exclusions(service, args) -> Snapshot:
    ss = Snapshot()
    eprint('Examining contents of Google Drive folder <%s>' % args.remote)
    ss = ss.merge(snapshot(service, [args.remote]))
    return ss
コード例 #15
0
 def read(path, service) -> 'SyncState':
     eprint('Reading sync state from %s.' % path)
     sss = SyncStateSchema()
     sss.context = {'service': service}
     return sss.load(json.loads(
         Path(path).read_text(encoding='utf-8'))).data
コード例 #16
0
 def clear(self):
     eprint('Removing sync state %s' % self.full_path)
     os.remove(self.full_path)
コード例 #17
0
ファイル: http.py プロジェクト: gmega/gdsyncpy
 def print_warning(self, rid):
     eprint(self.warning.format(rid=rid))
コード例 #18
0
ファイル: dedup.py プロジェクト: gmega/gdsyncpy
def dedup_list(service, args):
    eprint('Computing duplicates and resolving resource paths.')
    duplicates, paths = compute_duplicates(service, get_snapshot(service, args))
    summary = {
        md5: [
            {
                'id': duplicate.id,
                'path': str(paths[duplicate])
            }
            for duplicate in entries
        ] for md5, entries in duplicates.items()
    }

    if len(duplicates) == 0:
        eprint('Hooray! There are no duplicates in the snapshot.')
    else:
        eprint('Duplicates were found.')

    if args.json:
        print(json.dumps(summary, indent=3))
    else:
        for md5, entries in summary.items():
            eprint('------ %s -------' % md5)
            for entry in entries:
                eprint('%s (%s)' % (entry['path'], entry['id']))
            eprint('')