Example #1
0
def check_git_repo(args):
    gitrepo = GITRepo('.')
    if args.yes == False:
        files_are_not_commited = False        
        untracked_files = [f for f in gitrepo.untracked_files if not f.startswith('.dvc_cc')]
        if len(untracked_files) > 0:
            print('Warning: Some files are untracked: ' + str(untracked_files))
            files_are_not_commited = True
        changed_files = [f.a_path for f in gitrepo.index.diff(None) if not f.a_path.startswith('.dvc_cc')] 
        if len(changed_files) > 0:
            print('Warning: Some files are changed: ' + str(changed_files))
            files_are_not_commited = True
        if files_are_not_commited:
            user_answer = input("Do you want continue? (y/n): ")
            if user_answer.lower().strip().startswith('n'):
                print('You abort this command. You could use "git add -A", "git commit -m \'some message\'" and "git push" to commit this file.')
                exit(1)
    """ No need for this, because this script pushes the results and the not pushed commits also.
    if check_output(["git", "status"]).decode("utf8").split('\n')[1].startswith('Your branch is ahead'):
        print('Warning: You did not push the last commit. Use "git push".')
        if args.yes == False:
            user_answer = input("Do you want continue? (y/n): ")
            if user_answer.lower().strip().startswith('n'):
                print('You abort this command. Please push your commit first.')
                exit(1)
    """
    return
Example #2
0
def get_main_git_directory_Path():
    gitrepo = GITRepo('.')
    git_path = gitrepo.common_dir.split('/.git')[0]
    return git_path
Example #3
0
def main():
    parser = ArgumentParser(description=DESCRIPTION)
    args = parser.parse_args()

    project_dir = get_main_git_directory_Path()

    #os.chdir(str(Path(project_dir))

    gitrepo = GITRepo('.')
    dvcrepo = DVCRepo('.')

    subprocess.call(['git', 'push'])
    subprocess.call(['git', 'push', 'origin', '--tags'])

    if os.path.exists(str(Path('.dvc_cc/cc_agency_experiments.yml'))):
        with open(str(Path('.dvc_cc/cc_agency_experiments.yml')),
                  'r') as stream:
            try:
                experiments = yaml.safe_load(stream)
            except yaml.YAMLError as exc:
                print(exc)
                exit(1)

        start_an_experiment = False
        for k in experiments.keys():
            # find all files
            paths = []
            if experiments[k]['id'] is None:
                print('Start job ' + k)
                start_an_experiment = True
                paths.extend(experiments[k]['files'])
                # write all to tmp.red.yml
                with open(str(Path('.dvc_cc/tmp.red.yml')), 'w') as f:
                    print("batches:", file=f)
                    for path in paths:
                        with open(str(Path(path)), "r") as r:
                            print(r.read(), file=f)
                    with open(str(Path('.dvc_cc/cc_config.yml')), "r") as r:
                        print(r.read(), file=f)

                # execute faice
                output = subprocess.Popen(
                    ('faice exec .dvc_cc/tmp.red.yml').split(),
                    stdout=subprocess.PIPE)
                cc_id = output.communicate()[0].decode().split()[-1]
                print('The experiment ID is: ' + cc_id)
                os.remove('.dvc_cc/tmp.red.yml')

                # write cc_id to cc_agency_experiments.yml
                experiments[k]['id'] = cc_id
        if start_an_experiment:
            with open(str(Path('.dvc_cc/cc_agency_experiments.yml')),
                      'w') as outfile:
                yaml.dump(experiments, outfile, default_flow_style=False)

        # push the ids
        subprocess.call(['git', 'add', '.dvc_cc/cc_agency_experiments.yml'])
        subprocess.call([
            'git', 'commit', '-m',
            '\'Update cc_agency_experiments.yml: ' + path + '\''
        ])
        subprocess.call(['git', 'push'])
    else:
        print(
            'Warning you did not define a job with dvc-cc run --no-exec. So there is no job to start.'
        )
Example #4
0
def main():

    parser = ArgumentParser(description=DESCRIPTION)
    parser.add_argument('experimentname', help='The name of the experiment that should be used. This can help you to search between all files.')
    parser.add_argument('-ne','--no-exec', help='If true the experiment get defined, but it will not run at a server. Warning: You should not use this command.', default=False, action='store_true')
    # TODO: parser.add_argument('-l','--local', help='Run the experiment locally!', default=False, action='store_true')
    # TODO: parser.add_argument('-q','--question', help='A question that you want to answer with that experiment.')
    # TODO: parser.add_argument('--use_only_a_tag', help='If you don't have any Hyperopt-DVC-CC files or just set one set of fixed parameters you can create a tag instead of a new branch.', default=False, action='store_true')
    parser.add_argument('-f','--dvc-files', help='The DVC files that you want to execute. If this is not set, it will search for all DVC files in this repository and use this. You can set multiple dvc files with: "first_file.dvc,second_file.dvc" or you can use "first_file.dvc|second_file.dvc" to run in a row the files in the same branch.')
    parser.add_argument('-y','--yes', help='If this paramer is set, than it will not ask if some files are not commited or it the remote is not on the last checkout. Warning: Untracked changes could be lost!', default=False, action='store_true')
    parser.add_argument('-r','--num-of-repeats', type=int, help='If you want to repeat the job multiple times, than you can set this value to a larger value than 1.', default=1)
    parser.add_argument('--not-ipynb-to-py', help='If this paramer is set, than it will NOT convert all jupyter notebook files to py files.', default=False,
                        action='store_true')
    parser.add_argument('-l','--live_output_files',
                        help='Comma separated string list of files that should be included to the live output for example: "tensorboard,output.json" This could track a tensorboard folder and a output.json file.')
    parser.add_argument('-lf','--live_output_update_frequence', type=int,
                        help='The update frequence of the live output in seconds.',
                        default=60)
    parser.add_argument('--keyring-service', type=str,
                        help='The default name of the keyring service that is used. For more information visit: '
                             'https://www.curious-containers.cc/docs/red-format-protecting-credentials',
                        default= None)

    parser.add_argument('-p','--papermill',help='Use papermill to run the jupyter notebook on the server and save the results in the jupyter notebook. If this parameter is set, no jupyter notebook will be converted to py files.',
                        default=False, action='store_true')


    parser.add_argument('-de','--delay-execution',help='If this parameter is true, than it will create first ALL input branches and than execute it once.',
                        default=False, action='store_true')
    parser.add_argument('--optuna',
                        help='This script to this parameter is in progress. It will create two directories beside your main git repository folder. In the first you find a generated script to '
                             'run a hyperoptimization with optuna. In the second you find a script that copy metrics from different result branches to the other folder. You need to start '
                             'both script manually.',
                        default=False, action='store_true')


    args = parser.parse_args()

    project_dir = get_main_git_directory_Path()
    #os.chdir(str(Path(project_dir)))
    
    gitrepo = GITRepo('.')

    startbranch = gitrepo.active_branch.name

    # Check if all files are checked and pushed.
    check_git_repo(args)

    exp_id = create_new_exp_id()

    ##########################################
    # WARN, if you are on a cc or rcc branch #
    ##########################################
    if startbranch.startswith('cc_'):
        print(bcolors.WARNING+'WARNING: You are on a DVC-CC branch.'+bcolors.ENDC)
        # TODO if the rerun command exists throw an error!
        #   print('         You should use the dvc rerun command')
        user_input = input('Do you want to continue? [y,N]')
        if not user_input.lower().startswith('y'):
            print('You can switch to a other branch with "git checkout THE_BRANCH_NAME".')
            exit(0)
    elif startbranch.startswith('rcc_'):
        print(bcolors.FAIL+'ERROR: you are on a DVC-CC-RESULT branch. It is not allowed to execute DVC-CC here. To run a job take a look at the readme of this repository.'+ bcolors.ENDC)
        exit(1)

    ############################
    # Check the Experimentname #
    ############################
    args.experimentname = args.experimentname.replace('/',' ').replace('_',' ').replace('\\\\',' ')\
                                                        .replace(';',' ').replace('.',' ').split(' ')
    if len(args.experimentname) == 1:
        args.experimentname = args.experimentname[0]
    else:
        args.experimentname = ''.join([e.capitalize() for e in args.experimentname])

    #################################################################################
    # Do a DVC-checkout to delete all files that was not created with DVC repro/run #
    #   TODO: This does not work currently! https://github.com/iterative/dvc/issues/2146
    #################################################################################
    try:
        subprocess.call(['dvc', 'checkout'])
    except:
        print('Some files are missing.')

    #############################################################
    # Find all hyperopt files and leafs to execute the pipeline #
    #############################################################
    dvc_files, list_of_hyperopt_files, Gs = find_all_dvc_leafs(args.dvc_files)

    ####################################
    # Error if no DVC-file was defined #
    ####################################
    if len(dvc_files) == 0 and not args.papermill:
        raise ValueError('There exist no job to execute! Create DVC-Files with "dvc run --no-exec ..." to define the jobs. Or check the .dvc_cc/dvc_cc_ignore file. All DVC-Files that are defined there are ignored from this script.')

    loaded_yml = None

    try:
        ###########################
        # Create an input branch! #
        ###########################
        exp_name = exp_id + '_' + args.experimentname
        print(bcolors.BOLD+'Create an input git-branch: ' + exp_name + bcolors.ENDC)
        subprocess.call(['git', 'checkout','-q', '-b', exp_name])
        #print(['git', 'push', '-u', 'origin', exp_name+':'+exp_name])
        #TODO: THIS THROWS ALWAYS A MERGE REQUEST ????
        subprocess.call(['git', 'push', '-q','-u', 'origin', exp_name+':'+exp_name])

        #############################
        # CONVERT Jupyter Notebooks #
        #############################
        if not args.not_ipynb_to_py and len(Gs) > 0:
            created_pyfiles_from_jupyter = all_jupyter_notebook_to_py_files(Gs)
        else:
            created_pyfiles_from_jupyter = []
        for f in created_pyfiles_from_jupyter:
            subprocess.call(['git', 'add', f]) #TODO: build quite mode!
            print(bcolors.BOLD + 'The following file was created from a jupyter notebook: ' + f + bcolors.ENDC)
        if not args.not_ipynb_to_py:
            subprocess.call(['git', 'commit','-q','-m', 'Convert Jupyter Notebooks to Py-File.'])
            subprocess.call(['git', 'push', '-q', '-u', 'origin', exp_name + ':' + exp_name])

        ######################################
        # Use papermill to create a dvc file #
        ######################################
        if args.papermill:
            ipynb_files_in_main_dir = [f for f in os.listdir() if f.endswith('.ipynb')]  # Todo: Allow different location of the ipynb file
            if len(ipynb_files_in_main_dir) == 0:
                raise ValueError('To use papermill you need a jupyter notebook to run on the server in the main git '
                                 'directory.')
            else:
                ipynb_files_in_main_dir = ipynb_files_in_main_dir[0]
            print(bcolors.BOLD + 'Create a DVC file for executing ' + ipynb_files_in_main_dir + ' with papermill.' +
                  bcolors.ENDC)
            from dvc_cc.run import papermill_helper
            parameters = papermill_helper.read_parameters_from_parametercell(ipynb_files_in_main_dir)
            outputs = papermill_helper.read_definitions_from_parametercell(ipynb_files_in_main_dir, 'outputs')
            metrics = papermill_helper.read_definitions_from_parametercell(ipynb_files_in_main_dir, 'metrics')

            cmd = 'papermill ' + ipynb_files_in_main_dir + ' ' + ipynb_files_in_main_dir[:-6] + '_output.ipynb --log-output -k python'
            for p in parameters:
                cmd = cmd +' -p ' + p[0] + ' {{' + p[0] + ':' + p[1] + ':None}}'

            dvc_cc_command = ['dvc-cc', 'hyperopt', 'new', '-d', ipynb_files_in_main_dir, '-o',
                             ipynb_files_in_main_dir[:-6] + '_output.ipynb', '-f','papermill.dvc']
            for o in outputs:
                dvc_cc_command = dvc_cc_command + ['-o', o[1]]
            for m in metrics:
                dvc_cc_command = dvc_cc_command + ['-M', m[1]]
            dvc_cc_command = dvc_cc_command + [cmd]

            subprocess.call(dvc_cc_command)
            subprocess.call(['git', 'add', 'dvc/.hyperopt/papermill.hyperopt'])  # TODO: build quite mode!
            subprocess.call(['git', 'commit','-q','-m', 'Create a dvc file for papermill.'])
            subprocess.call(['git', 'push', '-q', '-u', 'origin', exp_name + ':' + exp_name])
            dvc_files, list_of_hyperopt_files, Gs = find_all_dvc_leafs(args.dvc_files)

        ##########################
        # Get All Hyperparemters #
        ##########################
        vc = VariableCache()

        for f in list_of_hyperopt_files:
            f = str(Path('dvc/.hyperopt/' + f))
            vc.register_dvccc_file(f)

        if args.optuna:
            from dvc_cc.run import optuna_scripts
            rcc_branch_names = [exp_name+'_XXXXXXXXXX']
            create_cc_config(dvc_files, exp_name, rcc_branch_names, args.num_of_repeats,
                                args.live_output_files, args.live_output_update_frequence)
            optuna_scripts.create_optuna_directories(exp_name, vc)
            return

        ###################################
        # DEFINE ALL Hyperopt-Experiments #
        ###################################
        if len(vc.list_of_all_variables) > 0:
            hyperopt_draws = create_hyperopt_variables(vc)
            user_input = input(
                'You defined ' + str(len(hyperopt_draws)) + ' * ' + str(args.num_of_repeats) + ' = ' + str(
                    len(
                        hyperopt_draws) * args.num_of_repeats) + ' hyperoptimization pairs. Do you want to continue and start the job? [y,n]: ')
            if not user_input.lower().startswith('y'):
                print('The job was canceled')
                exit(0)
        else:
            hyperopt_draws = [[]]

        loaded_yml = None


        #####################################################
        # TODO: SAVE the Hyperopt-Values and the VC!        #
        #   WITH THIS It is possible to get rerun the code. #
        #####################################################

        ######################################
        # TODO: DEFINE THE NEW BRANCH NAMES! #
        ######################################
        rcc_branch_names = define_the_rcc_branch_names(exp_name, hyperopt_draws, vc.list_of_all_variables)

        #################################
        # Loop each Hyperopt-Experiment #
        #################################
        print(bcolors.BOLD + 'DVC-CC: Generate all dvc files.' + bcolors.ENDC)
        for i, (draw,rcc_branch_name) in enumerate(zip(hyperopt_draws, rcc_branch_names)):

            os.mkdir('dvc/'+str(rcc_branch_name))
            vc.set_values_for_hyperopt_files(draw, dvc_save_path='dvc/'+str(rcc_branch_name))

            subprocess.call(['git', 'add', 'dvc/'+str(rcc_branch_name)])

        print(bcolors.BOLD + 'DVC-CC: Build CC red yml.' + bcolors.ENDC)
        create_cc_config(dvc_files, exp_name, rcc_branch_names, args.num_of_repeats,
                                args.live_output_files, args.live_output_update_frequence)

        subprocess.call(['git', 'add', 'cc_execution_file.red.yml'])

        print(bcolors.BOLD + 'DVC-CC: Save DVC and CC files to git.' + bcolors.ENDC)
        subprocess.call(['git', 'commit', '-q', '-m', 'DVC-CC: created DVC and the CC red yml file.'])
        subprocess.call(['git', 'push', '-q', '-u', 'origin', exp_name + ':' + exp_name])

        print(bcolors.BOLD + 'DVC-CC: Execute jobs.' + bcolors.ENDC)
        cc_id = exec_branch(args.keyring_service)

        if os.path.exists(str(Path('.dvc_cc/cc_ids.yml'))):
            with open(str(Path('.dvc_cc/cc_ids.yml')), 'r') as f:
                loaded_yml = yaml.safe_load(f)
        else:
            loaded_yml = {}

        if exp_name in loaded_yml:
            loaded_yml[exp_name].append(cc_id)
        else:
            loaded_yml[exp_name] = [cc_id]

        with open(str(Path('.dvc_cc/cc_ids.yml')), 'w') as f:
            yaml.dump(loaded_yml, f)

        print(bcolors.BOLD + 'DVC-CC: Save the ID to git.' + bcolors.ENDC)
        subprocess.call(['git', 'add', '.dvc_cc/cc_ids.yml'])
        subprocess.call(['git', 'commit', '-q', '-m', 'DVC-CC: Start the jobs.'])
        subprocess.call(['git', 'push', '-q', '-u', 'origin', exp_name + ':' + exp_name])

    finally:
        ##########################
        # Return to START-Branch #
        ##########################
        subprocess.call(['git', 'checkout', startbranch])
        try:
            subprocess.call(['rm', '-R', '.dvc/lock'], stdout=subprocess.DEVNULL,
                                      stderr=subprocess.DEVNULL)
            print('Information: ".dvc/lock" was removed.')
        except:
            pass

        try:
            subprocess.call(['rm', 'cc_execution_file.red.yml'], stdout=subprocess.DEVNULL,
                                      stderr=subprocess.DEVNULL)
            print('Information: delete "cc_execution_file.red.yml')
        except:
            pass

        if 'rcc_branch_names' in locals():
            for rcc_branch_name in rcc_branch_names:
                if os.path.isdir('dvc/'+str(rcc_branch_name)):
                    try:
                        subprocess.call(['rm', '-fR', 'dvc/'+str(rcc_branch_name)], stdout=subprocess.DEVNULL,
                                    stderr=subprocess.DEVNULL)
                    except:
                        pass

        if loaded_yml is not None:
            if os.path.exists(str(Path('.dvc_cc/cc_all_ids.yml'))):
                with open(str(Path('.dvc_cc/cc_all_ids.yml')), 'r') as f:
                    loaded_yml2 = yaml.safe_load(f)
            else:
                loaded_yml2 = {}
            loaded_yml2.update(loaded_yml)
            with open(str(Path('.dvc_cc/cc_all_ids.yml')), 'w') as f:
                yaml.dump(loaded_yml2, f)

            subprocess.call(['git', 'add', '.dvc_cc/cc_all_ids.yml'])
            subprocess.call(['git', 'commit', '-m', 'Update .dvc_cc/cc_all_ids.yml'])
            try:
                subprocess.call(['git', 'push'])
            except:
                print(bcolors.WARNING+'WARNING: It could not push the ID\' to git. This can happen if your branch is behind the remote branch. '
                      'You need to run "git pull" and "git push" to save the ID\'s in the git repository.'+bcolors.ENDC)
Example #5
0
def main():
    parser = ArgumentParser(description=DESCRIPTION)
    parser.add_argument('--htw-student', help='If this parameter is set, it will not ask the user to set the values. '
                                             'All values will set by default values.',default=False, action='store_true')
    parser.add_argument('--htw-staff', help='If this parameter is set, it will not ask the user to set the values. '
                                             'All values will set by default values.',default=False, action='store_true')

    parser.add_argument('--stderr-in-same-file',
                        help='If you do not want a own file for stdout and stderr you need to set this flag. If this flag is set, it will use the same file for both stdout and stderr.',
                        default=False, action='store_true')

    args = parser.parse_args()

    gitrepo,gitowner,gitname = get_gitinformation()

    if not args.htw_student and not args.htw_staff:
        print('These settings refer to the required hardware resources in the cluster.')
        print('If you do not set an argument it will take the default values.')

        print()
        print('Please enter the number of GPUs that you want on the cluster. Hint: In the most Deep Learning '
              'scripts, you want to use 1 GPU in the docker container.')
        num_of_gpus = None
        while num_of_gpus is None:
            num_of_gpus = input(bcolors.OKBLUE+'\tNumber of GPUs'+bcolors.ENDC+' (default 0): ')
            if num_of_gpus == '':
                num_of_gpus = 0
            elif num_of_gpus.isdigit():
                num_of_gpus = int(num_of_gpus)
            else:
                print(bcolors.FAIL + '\tWarning: Did not understand your answer. Please use integer values i.e. 0,1,2,3,...' + bcolors.ENDC)
                num_of_gpus = None

        print()
        print('Please enter the RAM that you want on the cluster.')
        ram = None
        while ram is None:
            ram = input(bcolors.OKBLUE+'\tRAM in GB'+bcolors.ENDC+' (default 20): ')
            if ram == '':
                ram = 20000 # 20 GB
            elif ram.isdigit():
                ram = int(ram)*1000
            else:
                print(bcolors.FAIL + '\tWarning: Did not understand your answer. Please use integer values i.e. 10,100,...'+bcolors.ENDC)
                ram = None

        print()
        print('Please enter the Docker Image in which your script gets executed at the cluster.')
        print('   You can choose from the following:')
        print('     - "large", if you want to work with PyTorch 1.2 or/and TensorFlow 2.')
        print('   You can also enter a URL to your own Docker Image.')
        print('   If you need more informations take a look at the following site: https://bit.ly/2mgbiVK')
        docker_image = input(bcolors.OKBLUE+'\tDocker Image'+bcolors.ENDC+' (default: "large"): ')
        if docker_image == '' or docker_image.lower() == 'large':
            docker_image = 'docker.io/deepprojects/dvc-cc-large:10.2'
            docker_image_needs_credentials = False
        else:
            docker_image_needs_credentials = None
            while docker_image_needs_credentials is None:
                docker_image_needs_credentials = input('\tDoes this docker image needs '
                                                       ''+bcolors.OKBLUE+'credentials'+bcolors.ENDC+'? [y,n]:')
                if docker_image_needs_credentials.lower().startswith('y'):
                    docker_image_needs_credentials = True
                elif docker_image_needs_credentials.lower().startswith('n'):
                    docker_image_needs_credentials = False
                else:
                    print(bcolors.FAIL+'\tWarning: Did not understand your answer. Please use y or n.'+bcolors.ENDC)
                    docker_image_needs_credentials = None
        print('You will use the Docker Image: '+ docker_image)
        print()
        batch_concurrency_limit = None
        print('The batch concurrency limit describes how many jobs you can start in parallel.')
        print('You can lower the number to 1, if you do not want the jobs from one experiment runs in parallel.')
        while batch_concurrency_limit is None:
            batch_concurrency_limit = input(bcolors.OKBLUE+'\tBatch concurrency limit'+bcolors.ENDC+' (default 12): ')
            if batch_concurrency_limit == '':
                batch_concurrency_limit = 12
            elif batch_concurrency_limit.isdigit():
                batch_concurrency_limit = int(batch_concurrency_limit)
            else:
                print(bcolors.FAIL+'\tWarning: Did not understand your answer. Please use integer values i.e. 1,4,12,...'+bcolors.ENDC)
                batch_concurrency_limit = None

        print()
        print('The name of the engine you want to use. This describes the cluster that you want to use.')
        print('At the HTW we have the engines "dt", "cc" and "cctest".')

        engine = input('\tThe '+bcolors.OKBLUE+'engine'+bcolors.ENDC+' you want to use (default: dt): ')
        if engine == '' or engine == 'dt':
            engine = 'ccagency'
            engine_url = 'https://agency.f4.htw-berlin.de/dt'
        elif engine == 'cc':
            engine = 'ccagency'
            engine_url = 'https://agency.f4.htw-berlin.de/cc'
        elif engine == 'cctest':
            engine = 'ccagency'
            engine_url = 'https://agency.f4.htw-berlin.de/cctest'
        else:
            print('\tThis engine is unknown. Please specify the engine-url:')
            engine_url = input('The ' + bcolors.OKBLUE + 'engine-url' + bcolors.ENDC + ' you want to use: ')

        print('You will use the engine "' +engine+'" with the url "'+engine_url+'".')

        print()
        print('All large files created by your script and defined as output files by DVC are stored on the DVC server.')
        print('At the HTW we have the storage server "dt1" and "avocado01".')
        dvc_remote_server = input('\tThe remote '+bcolors.OKBLUE+'DVC server'+bcolors.ENDC+' that you want use ('
                                                                                          'default: dt1): ')
        if dvc_remote_server == '' or dvc_remote_server.lower() == 'dt' or dvc_remote_server.lower() == 'dt1':
            dvc_remote_server = 'dt1.f4.htw-berlin.de'
        elif dvc_remote_server.lower() == 'avocado' or dvc_remote_server.lower() == 'avocado01':
            dvc_remote_server = 'avocado01.f4.htw-berlin.de'
        print('You will use the following DVC server "' + dvc_remote_server + '".')


        print()
        print('Here you can enter the folder where you want to store the DVC files on the DVC Storage Server.')
        if dvc_remote_server == 'avocado01.f4.htw-berlin.de':
            dvc_folder_default_value = '/data/ldap/Data-Version-Control-Cache/' + gitrepo + '/' + gitowner + '/' + \
                                       gitname
        else:
            dvc_folder_default_value = '~/' + gitrepo + '/' + gitowner + '/' + gitname
        dvc_remote_path = input('\tThe remote '+bcolors.OKBLUE+'DVC folder'+bcolors.ENDC+' that you want use ('
                                                                                         'default: '+dvc_folder_default_value+'): ')
        if dvc_remote_path == '':
            dvc_remote_path = dvc_folder_default_value

        print()
        print('The username with that you can access the DVC storage server "'+dvc_remote_server+'".')
        dvc_remote_user = input('\tThe '+bcolors.OKBLUE+'username'+bcolors.ENDC+' for the remote DVC folder: ')
        if dvc_remote_user == '':
            dvc_remote_user = input('Do you really want to use the connection to the remote dvc folder without credentials? [n,y]')
            if not dvc_remote_user.lower().startswith('y'):
                dvc_remote_user = input('The username for the remote DVC folder: ')
        print()
    elif args.htw_student:
        # set default values
        num_of_gpus = 1 ##
        ram = 60000
        docker_image = 'docker.io/deepprojects/dvc-cc-large:10.2'
        docker_image_needs_credentials = False
        batch_concurrency_limit = 12
        engine = 'ccagency'
        engine_url = 'https://agency.f4.htw-berlin.de/dt'
        dvc_remote_server = 'dt1.f4.htw-berlin.de'
        dvc_remote_path = '~/' + gitrepo + '/' + gitowner + '/' + gitname

        valid_matriculation_number = False

        print(bcolors.OKBLUE+'Information: The matriculation number is used to access the dt1-storage server and the curious containers '
              'agency. If you get asked for dt1_f4_htw_berlin_de_username or agency_username, please use your matriculation number. '
              'The password for agency_password and dt1_f4_htw_berlin_de_password is the password you received to access '
              'the curious containers agency.'+bcolors.ENDC)

        while valid_matriculation_number == False:
            dvc_remote_user = input('\tPlease fill in your matriculation number (i.e. s0XXXXXX): ').strip()
            if dvc_remote_user.startswith('s0') and dvc_remote_user[2:].isdigit():
                valid_matriculation_number = True
            else:
                print('This is not a valid matriculation number.')
    else:
        # set default values
        num_of_gpus = 1 ##
        ram = 180000
        docker_image = 'docker.io/deepprojects/dvc-cc-large:10.2'
        docker_image_needs_credentials = False
        batch_concurrency_limit = 12
        engine = 'ccagency'
        engine_url = 'https://agency.f4.htw-berlin.de/cc'
        dvc_remote_server = 'avocado01.f4.htw-berlin.de'
        dvc_remote_path = '/data/ldap/Data-Version-Control-Cache/' + gitrepo + '/' + gitowner + '/' + gitname

        valid_matriculation_number = False

        dvc_remote_user = input('\tPlease fill in your ldap username: '******'.')
    try:
        if os.path.exists(str(Path('.dvc/config'))):
            os.remove('.dvc/config')
        if os.path.exists(str(Path('.dvc/config.local'))):
            os.remove('.dvc/config.local')

        dvcrepo = DVCRepo('.')

        #TODO: this can be removed!?
        if not os.path.exists('.dvc'):
            dvcrepo.init()
    except:
        subprocess.call(['dvc', 'init'])
        dvcrepo = DVCRepo('.')

    if dvc_remote_path.startswith('~'):
        if dvc_remote_server == 'dt1.f4.htw-berlin.de':
            dvc_remote_path = '/mnt/md0/' + dvc_remote_user + dvc_remote_path[1:]
        else:
            dvc_remote_path = '/home/'+ dvc_remote_user + dvc_remote_path[1:]


    # set remote dvc connection
    if dvc_remote_user == '':
        subprocess.call(
            ['dvc', 'remote', 'add', '--force', '-d', 'dvc_connection', 'ssh://' + dvc_remote_server + ':' + dvc_remote_path])
        subprocess.call(['dvc', 'remote', 'modify', 'dvc_connection', 'ask_password', 'false'])
    else:
        subprocess.call(['dvc', 'remote', 'add', '--force', '-d', 'dvc_connection',
                         'ssh://' + dvc_remote_user + '@' + dvc_remote_server + ':' + dvc_remote_path])
        subprocess.call(['dvc', 'remote', 'modify', 'dvc_connection', 'ask_password', 'true'])

    try:
        subprocess.call(['ssh', dvc_remote_user + '@' + dvc_remote_server, "mkdir -p "+dvc_remote_path+" ; chmod 774 "+dvc_remote_path+" ; setfacl -d -m u::rwX,g::rwX,o::- "+dvc_remote_path])
    except:
        print(bcolors.WARNING+'Warning: Currently acl is not installed on the server! You will maybe have problems by sharing the same remote dvc folder!'+bcolors.ENDC)


    # create the main folder of the dvc_cc software package.
    if not os.path.exists('.dvc_cc'):
        os.mkdir('.dvc_cc')
    
    # create the config file.    
    if os.path.exists(str(Path('.dvc_cc/cc_config.yml'))):
        os.remove('.dvc_cc/cc_config.yml')

    create_cc_config_file(num_of_gpus,ram,docker_image, docker_image_needs_credentials, batch_concurrency_limit,
                          engine, engine_url, args.stderr_in_same_file)
    subprocess.call(['git', 'add', '.dvc_cc/cc_config.yml'])