def setUp(self): self.train_gml_dir = os.path.join(test_s3shared_path, 'test_dataset', 'seen_repos', 'train_graphs') self.test_gml_dir = os.path.join(test_s3shared_path, 'test_dataset', 'seen_repos', 'test_graphs') self.train_output_dataset_dir = os.path.join(test_s3shared_path, 'train_model_dataset') os.makedirs(self.train_output_dataset_dir, exist_ok=True) self.test_output_dataset_dir = os.path.join(test_s3shared_path, 'test_model_dataset') os.makedirs(self.test_output_dataset_dir, exist_ok=True) self.train_log_dir = os.path.join(test_s3shared_path, 'train_logs', get_time()) self.test_log_dir = os.path.join(test_s3shared_path, 'test_logs', get_time()) self.train_gml_files = [] for file in os.listdir(self.train_gml_dir): if file[-4:] == '.gml': self.train_gml_files.append( os.path.abspath(os.path.join(self.train_gml_dir, file))) self.test_gml_files = [] for file in os.listdir(self.test_gml_dir): if file[-4:] == '.gml': self.test_gml_files.append( os.path.abspath(os.path.join(self.test_gml_dir, file))) train_task = VarNamingTask.from_gml_files(self.train_gml_files) self.train_task_filepath = os.path.join(self.train_gml_dir, 'TrainVarNamingTask.pkl') train_task.save(self.train_task_filepath) test_task = VarNamingTask.from_gml_files(self.test_gml_files) self.test_task_filepath = os.path.join(self.test_gml_dir, 'TestVarNamingTask.pkl') test_task.save(self.test_task_filepath)
def setUp(self): self.gml_dir = os.path.join(test_s3shared_path, 'test_dataset', 'repositories') self.output_dataset_dir = os.path.join(test_s3shared_path, 'FITB_test_dataset') self.log_dir = os.path.join(test_s3shared_path, 'test_logs', get_time()) os.makedirs(self.output_dataset_dir, exist_ok=True) self.test_gml_files = [] for file in os.listdir(self.gml_dir): if file[-4:] == '.gml': self.test_gml_files.append(os.path.abspath(os.path.join(self.gml_dir, file))) task = FITBTask.from_gml_files(self.test_gml_files) self.task_filepath = os.path.join(self.gml_dir, 'FITBTask.pkl') task.save(self.task_filepath)
def setUp(self): self.gml_dir = os.path.join(test_s3shared_path, 'test_dataset', 'repositories') self.output_dataset_dir = os.path.join(test_s3shared_path, 'VarNaming_minibatch_memorize_test_dataset') self.log_dir = os.path.join(test_s3shared_path, 'test_logs', get_time()) os.makedirs(self.output_dataset_dir, exist_ok=True) self.test_gml_files = [] self.n_graphs_for_minibatch = 5 self.minibatch_size = 20 for file in os.listdir(self.gml_dir)[:self.n_graphs_for_minibatch]: if file[-4:] == '.gml': self.test_gml_files.append(os.path.abspath(os.path.join(self.gml_dir, file))) task = VarNamingTask.from_gml_files(self.test_gml_files) self.task_filepath = os.path.join(self.gml_dir, 'VarNamingTask.pkl') task.save(self.task_filepath)
def run_command_on_remote(ec2_instance_id: str, function: Callable, kwargs: dict): session_name = '_'.join([get_time(), str(int(time.time())), function.__name__]) remote_commands = dict(commands=["tmux -S /tmp/socket new-session -d -s {}".format(session_name), "chmod 777 /tmp/socket", "tmux -S /tmp/socket send-keys -t {} 'sudo su ubuntu -l' C-m".format(session_name), "tmux -S /tmp/socket send-keys -t {} 'cd {}' C-m".format(session_name, aws_config[ 'remote_project_root']), "tmux -S /tmp/socket send-keys -t {} 'eval $(ssh-agent -s)' C-m".format( session_name), "tmux -S /tmp/socket send-keys -t {} 'ssh-add {}' C-m".format(session_name, aws_config[ 'git_ssh_key_loc']), "tmux -S /tmp/socket send-keys -t {} 'git pull' C-m".format(session_name), "tmux -S /tmp/socket send-keys -t {} '{}' C-m".format(session_name, ' '.join( ['export {}={}; '.format(k, v) for k, v in aws_config['environment_variables'].items()])), "tmux -S /tmp/socket send-keys -t {} '{} -m experiments.run_command_on_remote {}' C-m".format( session_name, sys.executable, serialize_call(function, kwargs))]) if ec2_instance_id == 'local': os.chdir(project_root_path) command = remote_commands['commands'] for c in command: logger.info('Running command {}'.format(c)) subprocess.run(c, shell=True, env=os.environ.copy()) else: command = ['aws', 'ssm', 'send-command', '--instance-ids', ec2_instance_id, '--document-name', 'AWS-RunShellScript', '--parameters', json.dumps(remote_commands), '--output', 'text', '--query', 'Command.CommandId', '--profile', aws_config['remote_config_profile_name']] logger.info('Running command {}'.format(' '.join(command))) subprocess.run(command, env=os.environ.copy())
# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. import time from experiments import aws_config from experiments.run_command_on_remote import run_command_on_remote from experiments.train_model_for_experiment import train_model_for_experiment from experiments.utils import get_time if __name__ == '__main__': experiment_run_log_id = get_time() instance_ids_train_kwargs = [ ( aws_config['remote_ids']['box1'], dict(dataset_name='18_popular_mavens', experiment_name='FITB_vocab_comparison', experiment_run_log_id=experiment_run_log_id, seed=5145, gpu_ids=(0, 1, 2, 3), model_name='FITBClosedVocabGGNN', model_label='all_edge', model_kwargs=dict(hidden_size=64, type_emb_size=30, name_emb_size=31, n_msg_pass_iters=8), init_fxn_name='Xavier', init_fxn_kwargs=dict(), loss_fxn_name='FITBLoss', loss_fxn_kwargs=dict(), optimizer_name='Adam', optimizer_kwargs={'learning_rate': .0002}, val_fraction=0.15,