Example #1
0
def main():
    cmds = ['--version', 'init', 'run', 'sync', 'repro', 'data', 'remove', 'import', 'lock', 'cloud', \
            'cloud', 'cloud-run', 'cloud-instance-create', 'cloud-instance-remove', 'cloud-instance-describe', \
            'test', 'test-aws', 'test-gcloud', 'test-cloud']

    if len(sys.argv) < 2 or sys.argv[1] not in cmds:
        if len(sys.argv) >= 2:
            print('Unimplemented or unrecognized command: ' +
                  ' '.join(sys.argv[1:]))
        print_usage()
        sys.exit(-1)

    cmd = sys.argv[1]

    if cmd == '--version':
        print('dvc version {}'.format(VERSION))
    elif cmd == 'init':
        Runtime.run(CmdInit, parse_config=False)
    elif cmd == 'run':
        Runtime.run(CmdRun)
    elif cmd == 'repro':
        Runtime.run(CmdRepro)
    elif cmd == 'sync':
        Runtime.run(CmdDataSync)
    elif cmd == 'import':
        Runtime.run(CmdImportBulk)
    elif cmd == 'import-file':
        Runtime.run(CmdImportFile)
    elif cmd == 'remove':
        Runtime.run(CmdDataRemove)
    elif cmd == 'lock':
        Runtime.run(CmdLock)
    elif cmd == 'cloud-run':
        print('cloud-run unimplemented')
    elif cmd == 'cloud-instance-create':
        print('cloud-instance-create unimplemented')
    elif cmd == 'clould-instance-remove':
        print('cloud-instance-remove unimplemented')
    elif cmd == 'cloud-instance-describe':
        print('cloud-instance-describe unimplemented')

    elif cmd == 'test-aws':
        print('TODO: test aws credentials')
    elif cmd == 'test-gcloud':
        Runtime.run(CmdTest)
    else:
        print('Unimplemented or unrecognized command. ' +
              ' '.join(sys.argv[1]))
        print_usage()
        sys.exit(-1)
Example #2
0
            cmd = CmdImportFile(self.settings)
            cmd.set_git_action(not self.no_git_actions)
            cmd.set_locker(False)

            output = self.parsed_args.output
            for input in self.parsed_args.input:
                if not os.path.isdir(input):
                    cmd.import_and_commit_if_needed(input, output,
                                                    self.parsed_args.lock)
                else:
                    input_dir = os.path.basename(input)
                    for root, dirs, files in os.walk(input):
                        for file in files:
                            filename = os.path.join(root, file)

                            rel = os.path.relpath(filename, input)
                            out = os.path.join(output, input_dir, rel)

                            out_dir = os.path.dirname(out)
                            if not os.path.exists(out_dir):
                                os.mkdir(out_dir)

                            cmd.import_and_commit_if_needed(
                                filename, out, self.parsed_args.lock)
                pass
        pass


if __name__ == '__main__':
    Runtime.run(CmdImportBulk)
Example #3
0
                if state.locked and target:
                    Logger.warn('Data item {} is already locked'.format(
                        data_item.data.relative))
                elif not state.locked and not target:
                    Logger.warn('Data item {} is already unlocked'.format(
                        data_item.data.relative))
                else:
                    state.locked = target
                    Logger.debug('Saving status file for data item {}'.format(
                        data_item.data.relative))
                    state.save()
                    Logger.info('Data item {} was {}ed'.format(
                        data_item.data.relative, cmd))
            except Exception as ex:
                error += 1
                Logger.error('Unable to {} {}: {}'.format(cmd, file, ex))

        if error > 0 and not self.no_git_actions:
            Logger.error(
                'Errors occurred. One or more repro cmd was not successful.')
            self.not_committed_changes_warning()
        else:
            self.commit_if_needed('DVC lock: {}'.format(' '.join(self.args)))

        return 0


if __name__ == '__main__':
    Runtime.run(CmdLock, False)
Example #4
0
        return not error

    def data_items_from_args(self, argv):
        result = []

        for arg in argv:
            try:
                if os.path.isfile(arg):
                    data_item = self.settings.path_factory.data_item(arg)
                    result.append(data_item)
            except NotInDataDirError:
                pass

        return result

    def _data_items_from_params(self, files, param_text):
        if not files:
            return []

        data_items, external = self.settings.path_factory.to_data_items(files)
        if external:
            raise RunError(
                '{} should point to data items from data dir: {}'.format(
                    param_text, ', '.join(external)))
        return data_items


if __name__ == '__main__':
    Runtime.run(CmdRun)
        r'(?:/?|[/?]\S+)$',
        re.IGNORECASE)

    @staticmethod
    def is_url(url):
        return CmdDataImport.URL_REGEX.match(url) is not None

    @staticmethod
    def download_file(from_url, to_file):
        r = requests.get(from_url, stream=True)

        chunk_size = 1024 * 100
        downloaded = 0
        last_reported = 0
        report_bucket = 100 * 1024 * 1024
        with open(to_file, 'wb') as f:
            for chunk in r.iter_content(chunk_size=chunk_size):
                if chunk:  # filter out keep-alive new chunks
                    downloaded += chunk_size
                    last_reported += chunk_size
                    if last_reported >= report_bucket:
                        last_reported = 0
                        Logger.debug('Downloaded {}'.format(
                            sizeof_fmt(downloaded)))
                    f.write(chunk)
        return


if __name__ == '__main__':
    Runtime.run(CmdDataImport)
Example #6
0
        if not force and not was_source_code_changed and not were_input_files_changed:
            Logger.debug('Data item "{}" is up to date'.format(
                self._data_item.data.relative))
            return False

        return self.reproduce_data_file()

    @property
    def dependencies(self):
        dependency_data_items = []
        for input_file in self.state.input_files:
            try:
                data_item = self._cmd_obj.settings.path_factory.data_item(
                    input_file)
            except NotInDataDirError:
                raise ReproError(
                    u'The dependency file "{}" is not a data item'.format(
                        input_file))
            except Exception as ex:
                raise ReproError(
                    u'Unable to reproduced the dependency file "{}": {}'.
                    format(input_file, ex))

            dependency_data_items.append(data_item)

        return dependency_data_items


if __name__ == '__main__':
    Runtime.run(CmdRepro)
Example #7
0
            if blob.md5_hash == b64_encoded_md5:
                Logger.debug('checksum %s matches.  Skipping upload' % data_item.cache.relative)
                return
            Logger.debug('checksum %s mismatch.  re-uploading' % data_item.cache.relative)

        Logger.info('uploading cache file "{} to gc "{}"'.format(data_item.cache.relative, blob_name))

        blob = bucket.blob(blob_name)
        blob.upload_from_filename(data_item.resolved_cache.relative)
        Logger.info('uploading %s completed' % data_item.resolved_cache.relative)

    def sync_from_cloud(self, item):
        cloud = self.settings.config.cloud
        assert cloud in ['aws', 'gcp'], 'unknown cloud %s' % cloud
        if cloud == 'aws':
            return self._sync_from_cloud_aws(item)
        elif cloud == 'gcp':
            return self._sync_from_cloud_gcp(item)

    def sync_to_cloud(self, data_item):
        cloud = self.settings.config.cloud
        assert cloud in ['aws', 'gcp'], 'unknown cloud %s' % cloud
        if cloud == 'aws':
            return self._sync_to_cloud_aws(data_item)
        elif cloud == 'gcp':
            return self._sync_to_cloud_gcp(data_item)


if __name__ == '__main__':
    Runtime.run(CmdDataSync)
Example #8
0
            data_dir_path.name, cache_dir_path.name, state_dir_path.name))

        conf_file = open(conf_file_name, 'wt')
        conf_file.write(
            self.CONFIG_TEMPLATE.format(data_dir_path.name,
                                        cache_dir_path.name,
                                        state_dir_path.name))
        conf_file.close()

        self.modify_gitignore(cache_dir_path.name)

        message = 'DVC init. data dir {}, cache dir {}, state dir {}'.format(
            data_dir_path.name, cache_dir_path.name, state_dir_path.name)
        return self.commit_if_needed(message)

    def modify_gitignore(self, cache_dir_name):
        gitignore_file = os.path.join(self.git.git_dir, '.gitignore')
        if not os.path.exists(gitignore_file):
            open(gitignore_file, 'a').close()
            Logger.info('File .gitignore was created')
        with open(gitignore_file, 'a') as fd:
            fd.write('\n{}'.format(cache_dir_name))
            fd.write('\n{}'.format(os.path.basename(self.git.lock_file)))

        Logger.info(
            'Directory {} was added to .gitignore file'.format(cache_dir_name))


if __name__ == '__main__':
    Runtime.run(CmdInit, False)
Example #9
0
                            self.config.aws_secret_access_key)
        bucket_name = self.config.storage_bucket
        bucket = conn.lookup(bucket_name)
        if bucket:
            key = bucket.get_key(aws_file_name)
            if not key:
                Logger.warn(
                    '[Cmd-Remove] S3 remove warning: file "{}" does not exist in S3'
                    .format(aws_file_name))
            else:
                key.delete()
                Logger.info(
                    '[Cmd-Remove] File "{}" was removed from S3'.format(
                        aws_file_name))
        pass

    def remove_dir_file_by_file(self, target):
        for f in os.listdir(target):
            file = os.path.join(target, f)
            if os.path.isdir(file):
                self.remove_dir_file_by_file(file)
            else:
                self.remove_file(file)

        os.rmdir(target)
        pass


if __name__ == '__main__':
    Runtime.run(CmdDataRemove)
Example #10
0
        print_usage()
        sys.exit(-1)

    cmd = sys.argv[1]
    subcmd = None
    if cmd in cmds_expand:
        if (len(sys.argv) < 3 or sys.argv[2] not in cmds_expand[cmd]):
            print('for command %s, eligible actions are %s' %
                  (cmd, cmds_expand[cmd]))
            print_usage()
            sys.exit(-1)
        else:
            subcmd = sys.argv[2]

    if cmd == 'init':
        Runtime.run(CmdRun, args_start_loc=2)
    elif cmd == 'run':
        Runtime.run(CmdDataImport, args_start_loc=2)
    elif cmd == 'repro':
        Runtime.run(CmdRepro, args_start_loc=2)
    elif cmd == 'data-sync' or (cmd == 'data' and subcmd == 'sync'):
        Runtime.run(CmdDataSync, args_start_loc=2)
    elif cmd == 'data-import' or (cmd == 'data' and subcmd == 'import'):
        Runtime.run(CmdDataImport, args_start_loc=2)
    elif cmd == 'cloud-run' or (cmd == 'cloud' and subcmd == 'run'):
        print('cloud-run unimplemented')
    elif cmd == 'cloud-instance-create' or (cmd == 'cloud'
                                            and subcmd == 'instance-create'):
        print('cloud-instance-create unimplemented')
    elif cmd == 'clould-instance-remove' or (cmd == 'cloud'
                                             and subcmd == 'instance-remove'):