Exemple #1
0
    def setUp(self):
        self.train_gml_dir = os.path.join(test_s3shared_path, 'test_dataset',
                                          'seen_repos', 'train_graphs')
        self.test_gml_dir = os.path.join(test_s3shared_path, 'test_dataset',
                                         'seen_repos', 'test_graphs')
        self.train_output_dataset_dir = os.path.join(test_s3shared_path,
                                                     'train_model_dataset')
        os.makedirs(self.train_output_dataset_dir, exist_ok=True)
        self.test_output_dataset_dir = os.path.join(test_s3shared_path,
                                                    'test_model_dataset')
        os.makedirs(self.test_output_dataset_dir, exist_ok=True)
        self.train_log_dir = os.path.join(test_s3shared_path, 'train_logs',
                                          get_time())
        self.test_log_dir = os.path.join(test_s3shared_path, 'test_logs',
                                         get_time())
        self.train_gml_files = []
        for file in os.listdir(self.train_gml_dir):
            if file[-4:] == '.gml':
                self.train_gml_files.append(
                    os.path.abspath(os.path.join(self.train_gml_dir, file)))
        self.test_gml_files = []
        for file in os.listdir(self.test_gml_dir):
            if file[-4:] == '.gml':
                self.test_gml_files.append(
                    os.path.abspath(os.path.join(self.test_gml_dir, file)))

        train_task = VarNamingTask.from_gml_files(self.train_gml_files)
        self.train_task_filepath = os.path.join(self.train_gml_dir,
                                                'TrainVarNamingTask.pkl')
        train_task.save(self.train_task_filepath)
        test_task = VarNamingTask.from_gml_files(self.test_gml_files)
        self.test_task_filepath = os.path.join(self.test_gml_dir,
                                               'TestVarNamingTask.pkl')
        test_task.save(self.test_task_filepath)
Exemple #2
0
    def setUp(self):
        self.gml_dir = os.path.join(test_s3shared_path, 'test_dataset', 'repositories')
        self.output_dataset_dir = os.path.join(test_s3shared_path, 'FITB_test_dataset')
        self.log_dir = os.path.join(test_s3shared_path, 'test_logs', get_time())
        os.makedirs(self.output_dataset_dir, exist_ok=True)
        self.test_gml_files = []
        for file in os.listdir(self.gml_dir):
            if file[-4:] == '.gml':
                self.test_gml_files.append(os.path.abspath(os.path.join(self.gml_dir, file)))

        task = FITBTask.from_gml_files(self.test_gml_files)
        self.task_filepath = os.path.join(self.gml_dir, 'FITBTask.pkl')
        task.save(self.task_filepath)
Exemple #3
0
    def setUp(self):
        self.gml_dir = os.path.join(test_s3shared_path, 'test_dataset', 'repositories')
        self.output_dataset_dir = os.path.join(test_s3shared_path, 'VarNaming_minibatch_memorize_test_dataset')
        self.log_dir = os.path.join(test_s3shared_path, 'test_logs', get_time())
        os.makedirs(self.output_dataset_dir, exist_ok=True)
        self.test_gml_files = []
        self.n_graphs_for_minibatch = 5
        self.minibatch_size = 20
        for file in os.listdir(self.gml_dir)[:self.n_graphs_for_minibatch]:
            if file[-4:] == '.gml':
                self.test_gml_files.append(os.path.abspath(os.path.join(self.gml_dir, file)))

        task = VarNamingTask.from_gml_files(self.test_gml_files)
        self.task_filepath = os.path.join(self.gml_dir, 'VarNamingTask.pkl')
        task.save(self.task_filepath)
def run_command_on_remote(ec2_instance_id: str,
                          function: Callable,
                          kwargs: dict):
    session_name = '_'.join([get_time(), str(int(time.time())), function.__name__])
    remote_commands = dict(commands=["tmux -S /tmp/socket new-session -d -s {}".format(session_name),
                                     "chmod 777 /tmp/socket",
                                     "tmux -S /tmp/socket send-keys -t {} 'sudo su ubuntu -l' C-m".format(session_name),
                                     "tmux -S /tmp/socket send-keys -t {} 'cd {}' C-m".format(session_name, aws_config[
                                         'remote_project_root']),
                                     "tmux -S /tmp/socket send-keys -t {} 'eval $(ssh-agent -s)' C-m".format(
                                         session_name),
                                     "tmux -S /tmp/socket send-keys -t {} 'ssh-add {}' C-m".format(session_name,
                                                                                                   aws_config[
                                                                                                       'git_ssh_key_loc']),
                                     "tmux -S /tmp/socket send-keys -t {} 'git pull' C-m".format(session_name),
                                     "tmux -S /tmp/socket send-keys -t {} '{}' C-m".format(session_name, ' '.join(
                                         ['export {}={}; '.format(k, v) for k, v in
                                          aws_config['environment_variables'].items()])),
                                     "tmux -S /tmp/socket send-keys -t {} '{} -m experiments.run_command_on_remote {}' C-m".format(
                                         session_name,
                                         sys.executable,
                                         serialize_call(function, kwargs))])

    if ec2_instance_id == 'local':
        os.chdir(project_root_path)
        command = remote_commands['commands']
        for c in command:
            logger.info('Running command {}'.format(c))
            subprocess.run(c, shell=True, env=os.environ.copy())
    else:
        command = ['aws',
                   'ssm',
                   'send-command',
                   '--instance-ids', ec2_instance_id,
                   '--document-name', 'AWS-RunShellScript',
                   '--parameters', json.dumps(remote_commands),
                   '--output', 'text',
                   '--query', 'Command.CommandId',
                   '--profile', aws_config['remote_config_profile_name']]

        logger.info('Running command {}'.format(' '.join(command)))
        subprocess.run(command, env=os.environ.copy())
# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
import time

from experiments import aws_config
from experiments.run_command_on_remote import run_command_on_remote
from experiments.train_model_for_experiment import train_model_for_experiment
from experiments.utils import get_time

if __name__ == '__main__':
    experiment_run_log_id = get_time()
    instance_ids_train_kwargs = [
        (
            aws_config['remote_ids']['box1'],
            dict(dataset_name='18_popular_mavens',
                 experiment_name='FITB_vocab_comparison',
                 experiment_run_log_id=experiment_run_log_id,
                 seed=5145,
                 gpu_ids=(0, 1, 2, 3),
                 model_name='FITBClosedVocabGGNN',
                 model_label='all_edge',
                 model_kwargs=dict(hidden_size=64,
                                   type_emb_size=30,
                                   name_emb_size=31,
                                   n_msg_pass_iters=8),
                 init_fxn_name='Xavier',
                 init_fxn_kwargs=dict(),
                 loss_fxn_name='FITBLoss',
                 loss_fxn_kwargs=dict(),
                 optimizer_name='Adam',
                 optimizer_kwargs={'learning_rate': .0002},
                 val_fraction=0.15,