Beispiel #1
0
def main():
    ensure_config()
    wsserver_ip, wsserver_port = config["wsserver_addr"].split(':')
    queue = multiprocessing.Queue()
    proc = multiprocessing.Process(target=connectionManager.init,
                                   args=(queue, wsserver_ip,
                                         int(wsserver_port)))
    proc.start()

    create_logger()
    flask_init(queue)
    server_ip, server_port = config["server_addr"].split(':')
    start_flask(server_ip, int(server_port))
Beispiel #2
0
def cli(model_family_dir, model_name, data_dir, batch_size, max_seq_len,
        file_limit, gpu_id):

    model_family_dir = Path(model_family_dir)
    model_dir = model_family_dir / model_name
    device = torch.device(f'cuda:{gpu_id}' if gpu_id is not None else 'cpu')
    log = util.create_logger('test', os.path.join(model_dir, 'testing.log'),
                             True)

    log.info(f"Loading data from {data_dir}.")
    fields = data.load_fields(model_family_dir)
    fields['text'].include_lengths = True
    test_data = data.load_data(data_dir, fields, 'test', max_seq_len,
                               file_limit)

    vocab_size = len(fields['text'].vocab.itos)
    comm_vocab_size = len(fields['community'].vocab.itos)
    comms = fields['community'].vocab.itos
    pad_idx = fields['text'].vocab.stoi['<pad>']
    log.info(f"Loaded {len(test_data)} test examples.")

    model_args = json.load(open(model_dir / 'model_args.json'))
    lm = model.CommunityConditionedLM.build_model(**model_args).to(device)
    lm.load_state_dict(torch.load(model_dir / 'model.bin'))
    lm.to(device)
    lm.eval()
    log.debug(str(lm))

    test_iterator = tt.data.BucketIterator(test_data,
                                           device=device,
                                           batch_size=batch_size,
                                           sort_key=lambda x: -len(x.text),
                                           shuffle=True,
                                           train=False)

    def batchify_comm(comm, batch_size):
        comm_idx = fields['community'].vocab.stoi[comm]
        return torch.tensor(comm_idx).repeat(batch_size).to(device)

    with torch.no_grad(), open(model_dir / 'nll.csv', 'w') as f:
        meta_fields = ['community', 'example_id', 'length']
        data_fields = comms if lm.use_community else ['nll']
        writer = csv.DictWriter(f, fieldnames=meta_fields + data_fields)
        writer.writeheader()
        for i, batch in enumerate(test_iterator):
            nlls_batch = [
                dict(zip(meta_fields, meta_values)) for meta_values in zip(
                    [comms[i] for i in batch.community.tolist()],
                    batch.example_id.tolist(), batch.text[1].tolist())
            ]
            for comm in comms:
                if lm.use_community:
                    batch_comm = batchify_comm(comm, batch.batch_size)
                else:
                    batch_comm = None
                nlls_comm = batch_nll(lm, batch, pad_idx, comm=batch_comm)
                for j, nll in enumerate(nlls_comm):
                    nlls_batch[j][comm] = nll.item()
            writer.writerows(nlls_batch)
            log.info(f"Completed {i+1}/{len(test_iterator)}")
Beispiel #3
0
 def __init__(self, sess, dataset, conf):
     super(NGCF, self).__init__(dataset, conf)
     self.logger = create_logger(conf)
     self.learning_rate = conf["learning_rate"]
     self.learner = conf["learner"]
     self.batch_size = conf["batch_size"]
     self.emb_dim = conf["embedding_size"]
     self.weight_size = conf["layer_size"]
     self.n_layers = len(self.weight_size)
     self.num_epochs = conf["epochs"]
     self.reg = conf["reg"]
     self.adj_type = conf["adj_type"]
     self.alg_type = conf["alg_type"]
     self.node_dropout_flag = conf["node_dropout_flag"]
     self.node_dropout_ratio = conf["node_dropout_ratio"]
     self.mess_dropout_ratio = conf["mess_dropout_ratio"]
     self.data_name = conf["data.input.dataset"]
     self.embed_init_method = conf["embed_init_method"]
     self.weight_init_method = conf["weight_init_method"]
     self.stddev = conf["stddev"]
     self.verbose = conf["verbose"]
     self.dataset = dataset
     self.num_users = dataset.num_users
     self.num_items = dataset.num_items
     self.graph = dataset.train_matrix.toarray()
     self.norm_adj = self.get_adj_mat()
     #         self.n_nonzero_elems = self.norm_adj.count_nonzero()
     self.pretrain_data = None
     self.sess = sess
     self.logger.info(conf)
def main(args):
    logger = create_logger('logger')
    # it doesn't matter which models we load here because we only do white space or rule-based tokenization anyway
    nlp = spacy.load("en_core_web_sm")
    nlp.tokenizer = Tokenizer(nlp.vocab)
    matcher = Matcher(nlp.vocab)

    triggers = read_file(args.cue_list)
    setup_matcher(triggers, matcher)

    # load data
    data = load_data(args.input_file)

    tagged_sentences = []

    for seqid, _ in enumerate(data):
        observed_sequences = set()
        out, idxs = process_doc(nlp,
                                matcher,
                                ' '.join(data[seqid]['seq']),
                                split_sents=False)
        for sid, elm in enumerate(out[0]):
            if ' '.join(elm) not in observed_sequences:
                tagged_sentences.append({
                    'uid': len(tagged_sentences),
                    'seq': elm,
                    'sid': '{}_{}'.format(seqid, sid)
                })
                observed_sequences.add(' '.join(elm))
    logger.info('Writing tagged sequences to {}'.format(
        '.'.join(args.input_file.split('.')[:-1]) + '#cues.jsonl'))
    with open('.'.join(args.input_file.split('.')[:-1]) + '#cues.jsonl',
              'w') as fout:
        for elm in tagged_sentences:
            fout.write(json.dumps(elm) + '\n')
    fout.close()

    # produce html with colored negation cues
    logger.info('Writing html for visualization to {}'.format(
        '.'.join(args.input_file.split('.')[:-1]) + '#cues.html'))
    html = []
    for seq in tagged_sentences:
        seq = seq['seq']
        labels = ['O'] * len(seq)
        for i, tok in enumerate(seq):
            if tok == '[CUE]':
                labels[i] = 'CUE'
                if i < len(labels) - 1:
                    labels[i + 1] = 'CUE'
        html.append(
            html_heatmap(
                words=[elm for elm in seq if elm != '[CUE]'] + ['<br>'],
                labels=[
                    elm for i, elm in enumerate(labels) if seq[i] != '[CUE]'
                ] + ['O']))
    with open('.'.join(args.input_file.split('.')[:-1]) + '#cues.html',
              'w') as fout:
        for elm in html:
            fout.write(elm + '\n')
    fout.close()
Beispiel #5
0
    def __init__(self, sess, dataset, conf):
        super(LightGCN, self).__init__(dataset, conf)
        self.logger = create_logger(conf)
        self.learning_rate = float(conf["learning_rate"])
        self.embedding_size = int(conf["embedding_size"])
        self.learner = conf["learner"]
        self.num_epochs = int(conf["epochs"])
        self.batch_size = int(conf["batch_size"])
        self.verbose = conf["verbose"]
        self.reg = float(conf["reg"])
        self.init_method = conf["init_method"]
        self.stddev = conf["stddev"]
        self.weight_size = conf["weight_size"]
        self.n_layers = len(self.weight_size)
        self.data_name = conf["data.input.dataset"]
        self.dataset = dataset
        self.num_users = dataset.num_users
        self.num_items = dataset.num_items

        self.R = dataset.train_matrix

        self.graph = dataset.train_matrix.tolil()
        self.norm_adj = self.get_adj_mat()
        self.sess = sess

        self.logger.info(conf)
Beispiel #6
0
 def __init__(self):
     self.logger = create_logger(__name__)
     self.actions_file = os.getenv(self.ACTIONS_FILE,
                                   self.ACTIONS_FILE_WRITE_LOCATION)
     self.conf_file = os.getenv(self.LEGACY_CONFIG, None)
     self.projectre = re.compile(r'^[a-z0-9]([-a-z0-9]*[a-z0-9])?$')
     self.projectmaxlen = 63
Beispiel #7
0
 def __init__(self):
     super().__init__()
     self.logger = util.create_logger("idea-bot")
     parent_conn, child_conn = Pipe()
     self.conn = parent_conn
     self.generator_process = GeneratorProcess(conn=child_conn)
     self.generator_process.start()
     self.loop.create_task(self.check_responses())
Beispiel #8
0
 def __init__(self):
     self.logger = create_logger(__name__)
     curator_cmd = CuratorCmd()
     self.cmd_list = curator_cmd.build_cmd_list()
     self.defaults = curator_cmd.get_defaults()
     self.hour = self.defaults.get('runhour', 0)
     self.minute = self.defaults.get('runminute', 0)
     self.timezone = self.defaults.get('timezone', 'UTC')
     self.job_list = CronTab()
Beispiel #9
0
def main():
    try:
        print("Starting embeddings generation")
        random_state = 1
        np.random.seed(random_state)

        # DEFINE MODEL PARAMS
        K = 45
        num_of_dims = 100
        #epochs
        bound_on_iter = 5
        omega = 0.45557
        e_release = 0.0414

        kg_root = '../dbpedia/pyke_data'
        kg_path = kg_root + '/'

        print("Path to KG: ", kg_path)

        storage_path, experiment_folder = ut.create_experiment_folder()
        logger = ut.create_logger(name='PYKE', p=storage_path)
        logger.info("Storage path: " + storage_path + "\texperiment folder: " +
                    experiment_folder)
        parser = Parser(p_folder=storage_path, k=K)
        parser.set_logger(logger)
        logger.info("Setting similarity measure")
        parser.set_similarity_measure(PPMI)
        logger.info("Model init")
        model = PYKE(logger=logger)
        logger.info("Analyzer init")
        analyser = DataAnalyser(p_folder=storage_path, logger=logger)
        # For the illustration purpusoes lets only process first 5000 ntriples from each given file.
        # To reproduce  reported results => parser.pipeline_of_preprocessing(kg_path)
        holder = parser.pipeline_of_preprocessing(kg_path)

        vocab_size = len(holder)
        logger.info("Vocab size: " + str(vocab_size))
        embeddings = ut.randomly_initialize_embedding_space(
            vocab_size, num_of_dims)

        learned_embeddings = model.pipeline_of_learning_embeddings(
            e=embeddings,
            max_iteration=bound_on_iter,
            energy_release_at_epoch=e_release,
            holder=holder,
            omega=omega)

        logger.info("Writing to file.")
        learned_embeddings.to_csv(storage_path + '/PYKE_100_embd.csv')
        logger.info("Done!")
        # To use memory efficiently
        del holder
        del embeddings

    except Exception as e:
        print(e)
Beispiel #10
0
    def run(self):
        self.logger = util.create_logger(__name__)
        self.logger.info("Starting GeneratorProcess")

        try:
            while not self.terminate:
                request = self.conn.recv()
                self.handle_request(request)
        except KeyboardInterrupt:
            return
 def __init__(self, config_file):
     self.config_file = config_file
     self.allowed_units = {'days': 'days', 'weeks': 'weeks', 'months': 'months'}
     self.default_time_unit = 'days'
     self.default_count = int(os.getenv('CURATOR_DEFAULT_DAYS', 31))
     self.runhour = int(os.getenv('CURATOR_RUN_HOUR', 0))
     self.runminute = int(os.getenv('CURATOR_RUN_MINUTE', 0))
     self.timezone = str(os.getenv('CURATOR_RUN_TIMEZONE', 'UTC'))
     self.logger = create_logger(__name__)
     self.internal_config_yaml = {}
 def __init__(self):
     config_file = os.getenv('CURATOR_CONF_LOCATION', 'config.yaml')
     parser = Parser(config_file)
     self.conf = parser.parse()
     self.projectre = re.compile(r'^[a-z0-9]([-a-z0-9]*[a-z0-9])?$')
     self.projectmaxlen = 63
     self.allowed_operations = {'delete': 'delete'}
     self.allowed_params = {'raw_regex': self.RAW_REGEX}
     self.curator_settings = {'delete': {}}
     self.logger = create_logger(__name__)
     self.curator_log_level = os.getenv('CURATOR_LOG_LEVEL', 'INFO')    # ERROR by default
     self.commands = []
 def __init__(self):
     config_file = os.getenv('CURATOR_CONF_LOCATION', '/etc/curator/settings/config.yaml')
     parser = Parser(config_file)
     self.conf = parser.parse()
     self.projectre = re.compile(r'^[a-z0-9]([-a-z0-9]*[a-z0-9])?$')
     self.projectmaxlen = 63
     self.allowed_operations = {'delete': 'delete'}
     self.allowed_params = {'raw_regex': self.RAW_REGEX}
     self.curator_settings = {'delete': {}}
     self.logger = create_logger(__name__)
     self.curator_log_level = os.getenv('CURATOR_LOG_LEVEL', 'ERROR')
     self.commands = []
Beispiel #14
0
 def __init__(self):
     self.logger =   create_logger(__name__)
     curator_cmd =   CuratorCmd()
     self.cmd_list = curator_cmd.build_cmd_list()
     self.defaults = curator_cmd.get_defaults()
     self.hour =     self.defaults.get('runhour', 0)
     self.minute =   self.defaults.get('runminute', 0)
     self.timezone = self.defaults.get('timezone', 'UTC')
     self.job_list = CronTab()
     self.ca =       os.getenv('ES_CA', '/etc/curator/keys/ca')
     self.cert =     os.getenv('ES_CLIENT_CERT', '/etc/curator/keys/cert')
     self.key =      os.getenv('ES_CLIENT_KEY', '/etc/curator/keys/key')
     self.es_host =  os.getenv('ES_HOST', 'logging-es')
     self.es_port =  os.getenv('ES_PORT', '9200')
Beispiel #15
0
 def __init__(self, config_file):
     self.config_file = config_file
     self.allowed_units = {
         'days': 'days',
         'weeks': 'weeks',
         'months': 'months'
     }
     self.default_time_unit = 'days'
     self.default_count = int(os.getenv('CURATOR_DEFAULT_DAYS', 31))
     self.runhour = int(os.getenv('CURATOR_RUN_HOUR', 0))
     self.runminute = int(os.getenv('CURATOR_RUN_MINUTE', 0))
     self.timezone = str(os.getenv('CURATOR_RUN_TIMEZONE', 'UTC'))
     self.logger = create_logger(__name__)
     self.internal_config_yaml = {}
 def __init__(self):
     self.logger =   create_logger(__name__)
     curator_cmd =   CuratorCmd()
     self.cmd_list = curator_cmd.build_cmd_list()
     self.defaults = curator_cmd.get_defaults()
     self.hour =     self.defaults.get('runhour', 0)
     self.minute =   self.defaults.get('runminute', 0)
     self.timezone = self.defaults.get('timezone', 'UTC')
     self.job_list = CronTab()
     self.ca =       os.getenv('ES_CA', '/etc/curator/keys/ca')
     self.cert =     os.getenv('ES_CLIENT_CERT', '/etc/curator/keys/cert')
     self.key =      os.getenv('ES_CLIENT_KEY', '/etc/curator/keys/key')
     self.es_host =  os.getenv('ES_HOST', 'logging-es')
     self.es_port =  os.getenv('ES_PORT', '9200')
     self.es_hostport = self.es_host + ':' + self.es_port
Beispiel #17
0
    def __init__(self, sess, dataset, config):
        super(FastLightGCN, self).__init__(dataset, config)
        self.logger = create_logger(config)

        # argument settings
        self.model_type = config['recommender']
        self.epoch = config["epoch"]
        self.adj_type = config["adj_type"]
        self.alg_type = config["alg_type"]
        self.n_users, self.n_items, self.num_ratings, self.sparsity = dataset.num_users, dataset.num_items, dataset.num_r, dataset.sparsity
        self.R = dataset.train_matrix
        self.dataset = dataset
        self.data_name = config["data.input.dataset"]
        self.n_fold = 100
        self.lr = config["learning_rate"]
        self.emb_dim = config["embed_size"]
        self.weight_size = config["weight_size"]
        self.node_dropout_flag = config["node_dropout_flag"]
        self.node_dropout = config["node_dropout"]
        self.mess_dropout = config["mess_dropout"]
        self.n_layers = len(self.weight_size)
        self.r_alpha = config["r_alpha"]
        self.fast_reg = config["fast_reg"]
        self.logger.info(
            "\"num_users\": %d,\"num_items\":%d, \"num_ratings\":%d, \"sparsity\":%.4f"
            % (self.n_users, self.n_items, self.num_ratings, self.sparsity))
        self.logger.info(config)
        self.sess = sess

        plain_adj, norm_adj, mean_adj, pre_adj = self.get_adj_mat()

        if config["adj_type"] == 'plain':
            self.norm_adj = plain_adj
            print('use the plain adjacency matrix')
        elif config["adj_type"] == 'norm':
            self.norm_adj = norm_adj
            print('use the normalized adjacency matrix')
        elif config["adj_type"] == 'gcmc':
            self.norm_adj = mean_adj
            print('use the gcmc adjacency matrix')
        elif config["adj_type"] == 'pre':
            self.norm_adj = pre_adj
            print('use the pre adjcency matrix')
        else:
            self.norm_adj = mean_adj + sp.eye(mean_adj.shape[0])
            print('use the mean adjacency matrix')

        self.n_nonzero_elems = self.norm_adj.count_nonzero()
def main(log_dir):
    logger = util.create_logger(name='bc_training', log_dir='pretrained/ant_pi')

    device = torch.device('cuda:0')
    policy = PIStudent(
        act_dim=ACT_DIM,
        msg_dim=32,
        pos_em_dim=8,
        hidden_dim=32,
    ).to(device)

    batch_size = 8
    data = load_data(os.path.join(log_dir, 'data.npz'))
    batches = sample_batch_data(data, batch_size=batch_size)

    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(policy.parameters(), lr=3e-4)

    max_iter = 1000000
    noise_sd = 0.1
    for i in range(max_iter):
        batch_data = torch.from_numpy(next(batches)).float().to(device)
        optimizer.zero_grad()

        # This is only to show how BC training works, it is inefficient.
        seq_len = batch_data.shape[1]
        losses = []
        for traj in batch_data:
            pred_act = []
            policy.attention_neuron.reset()  # Reset AttentionNeuron's hx.
            for t in range(seq_len):
                prev_act, obs = traj[t][:ACT_DIM], traj[t][ACT_DIM:-ACT_DIM]
                prev_act = prev_act + torch.randn(ACT_DIM).to(device) * noise_sd
                pred_act.append(policy(obs, prev_act))
            pred_act = torch.vstack(pred_act)
            act = traj[:, -ACT_DIM:]
            losses.append(criterion(input=pred_act, target=act))
        loss = sum(losses) / batch_size
        loss.backward()
        torch.nn.utils.clip_grad_norm_(policy.parameters(), max_norm=.1)
        optimizer.step()

        logger.info('iter={}, loss={}'.format(i, loss.item()))
        if i % 1000 == 0:
            save_model(policy, i)

    save_model(policy, max_iter)
Beispiel #19
0
    def __init__(self, sess, dataset, config):
        super(Local_end, self).__init__(dataset, config)
        self.logger = create_logger(config)
        self.logger.info(config)
        # argument settings
        self.model_type = config['recommender']
        self.epoch = config["epoch"]
        self.adj_type = config["adj_type"]
        self.alg_type = config["alg_type"]
        self.n_users, self.n_items = dataset.num_users, dataset.num_items
        self.R = dataset.train_matrix
        self.dataset = dataset
        self.data_name = config["data.input.dataset"]
        self.lr = config["learning_rate"]
        self.emb_dim = config["embed_size"]
        self.weight_size = config["weight_size"]
        self.node_dropout_flag = config["node_dropout_flag"]
        self.node_dropout = config["node_dropout"]
        self.mess_dropout = config["mess_dropout"]
        self.n_layers = len(self.weight_size)
        self.r_alpha = config["r_alpha"]
        self.fast_reg = config["fast_reg"]
        self.localmodel = config['localmodel']
        self.bw = config['d']
        self.sess = sess
        self.verbose = config['verbose']

        plain_adj, norm_adj, mean_adj, pre_adj, random_adj = self.get_adj_mat()
        if config["adj_type"] == 'plain':
            self.norm_adj = plain_adj
            print('use the plain adjacency matrix')
        elif config["adj_type"] == 'norm':
            self.norm_adj = norm_adj
            print('use the normalized adjacency matrix')
        elif config["adj_type"] == 'gcmc':
            self.norm_adj = mean_adj
            print('use the gcmc adjacency matrix')
        elif config["adj_type"] == 'pre':
            self.norm_adj = pre_adj
            print('use the pre adjcency matrix')
        else:
            self.pre_adj = pre_adj
            self.norm_adj = random_adj
            print('use the random adjacency matrix')
def main(config):
    logger = util.create_logger(name='test_solution', log_dir=config.log_dir)
    task = util.create_task(logger=logger)
    task.seed(config.seed)

    solution = util.create_solution(device='cpu:0')
    model_file = os.path.join(config.log_dir, config.model_filename)
    solution.load(model_file)

    rewards = []
    time_costs = []
    for ep in range(config.n_episodes):
        start_time = time.perf_counter()
        reward = task.rollout(solution=solution, evaluation=True)
        time_cost = time.perf_counter() - start_time
        rewards.append(reward)
        time_costs.append(time_cost)
        logger.info('Episode: {0}, reward: {1:.2f}'.format(ep + 1, reward))

    logger.info('Avg reward: {0:.2f}, sd: {1:.2f}'.format(
        np.mean(rewards), np.std(rewards)))
    logger.info('Time per rollout: {}s'.format(np.mean(time_costs)))
def main(log_dir):
    logger = util.create_logger(name='data_collection')

    solution = util.create_solution(device='cpu:0')
    model_file = os.path.join(log_dir, 'model.npz')
    solution.load(model_file)

    trajectories = []
    env = gym.make('AntBulletEnv-v0')

    # Collect trajectories from rollouts.
    max_ep_cnt = 1000
    traj_len = 500
    ep_saved = 0
    while ep_saved < max_ep_cnt:
        ep_reward = 0
        ep_steps = 0
        obs = env.reset()
        prev_act = np.zeros(8)
        ep_traj = []
        done = False
        while not done and ep_steps < traj_len:
            act = solution.get_action(obs)
            ep_traj.append(np.concatenate([prev_act, obs, act], axis=0))
            obs, reward, done, info = env.step(act)
            ep_reward += reward
            ep_steps += 1
        logger.info('Episode:{0}, steps:{1},  reward:{2:.2f}'.format(
            ep_saved + 1, ep_steps, ep_reward))
        if ep_steps >= traj_len:
            trajectories.append(np.vstack(ep_traj))
            ep_saved += 1
        else:
            logger.info('Trajectory too short, discard.')

    trajectories = np.stack(trajectories)
    logger.info('trajectories.shape={}'.format(trajectories.shape))
    np.savez(os.path.join(log_dir, 'data.npz'), data=trajectories)
    # create the save directory (for trianed model paremeters, logs, arguments)
    if not os.path.exists('models'):
        os.mkdir('models')
    if os.path.exists(save_dir):
        go_ahead = input("Overwriting files in {}. Continue? (y/n): ".format(save_dir))
        if go_ahead == 'y':
            util.rm_dir(save_dir)
        else:
            exit()
    os.mkdir(save_dir)
   
    # save the args so we can recover hyperparameters, etc.
    with open(os.path.join(save_dir, 'args.json'), 'w') as f:
        json.dump(args.__dict__, f)
    log = util.create_logger(args.verbose, os.path.join(save_dir, 'train.log'))
    eval_model.log = log  # set the eval_model logger to go to 'train.log'

    device = torch.device('cuda:{}'.format(args.gpu_id) if args.cuda and torch.cuda.is_available() else 'cpu')
    log.info("Training on {}.".format(device))

    word_vocab, word2id = data.load_vocab(args.vocab_file)
    tag_vocab, tag2id = data.load_vocab(args.tag_vocab_file)
    n_tags = len(tag_vocab)

    # select an utt_encoder and compatible utt tokenization
    log.info("Utt encoder: {}".format(args.utt_encoder))
    log.info("DAR model uses LSTM: {}".format(args.lstm))
    if args.utt_encoder == 'wordvec-avg': 
        if args.use_glove:
            weights = torch.FloatTensor(data.load_glove(args.utt_dims, word_vocab))
Beispiel #23
0
import util
from bot_issue_finder import find_issues
from repo_finder import find_repos
from repo_cloner import clone_repos
import pre_bot_issue_finder
import repo_analyser_v2

if __name__ == "__main__":
    settings = util.load_settings('settings.json')
    util.verify_loglevels(settings.get('loglevels'))
    loglevels = settings.get('loglevels')
    logoutputs = settings.get('logoutputs')

    # General logger
    logger = util.create_logger('bot_issue_finder', loglevels.get('general'),
                                logoutputs.get('general'))
    util.g_logger = logger

    logger.info("======SETTINGS======")
    util.verify_settings(settings)

    if settings.get('log-pygithub-requests'):
        util.load_gh_logger(settings.get('shorten-pygithub-requests'))

    # Load GitHub Login information
    login_settings = util.load_settings('login.json')

    token_or_username = login_settings.get('login_or_token')
    if token_or_username and login_settings.get('password'):
        # Someone logged in with their username/password combination
        logger.info(f"Logged in as {token_or_username}")
Beispiel #24
0
from util import create_logger

logger = create_logger('get_words', 'log.csv')

for word in ['first', 'second', 'third']:
    print(word)
    logger.debug('debug message')
    logger.warn('warn message')
    logger.critical('critical message')
Beispiel #25
0
import sys
from util import create_logger

logger = create_logger('count_words', 'ERROR', 'log.csv')

count = 0
for line in sys.stdin:
    count += 1
    message = 'counter {} for {}'.format(count, line.strip())
    logger.debug(message)
    logger.warn(message)
    logger.error(message)
print(count)
Beispiel #26
0
import math
import os
import statistics
from typing import Tuple, List, Union, Optional

import numpy
from openpyxl import load_workbook
from openpyxl.cell import ReadOnlyCell
from openpyxl.workbook.defined_name import DefinedName

from util import create_logger

logger = create_logger(__name__)


class Data:

    def __init__(self, filename: str = None, locations: List[str] = None, vehicle_types: List[str] = None,
                 distance_cost: List[float] = None, time_cost: List[float] = None, pallet_capacity: List[int] = None,
                 available_vehicles: List[int] = None, hired_cost_multiplier: List[float] = None,
                 demand: List[int] = None, window_start: List[float] = None, window_end: List[float] = None,
                 average_unload_time: List[float] = None, distances: List[List[float]] = None,
                 times: List[List[float]] = None):
        """If filename provided, will read in run_data from .xlsx using OpenPyXL.
        If no filename provided, will check named parameters for values."""
        if filename:
            try:
                self.workbook = load_workbook(filename=filename, read_only=True, data_only=True)

                # one_dimension_sheet: Worksheet = self.workbook["1D"]
Beispiel #27
0
def run_experiment(args):
    """
  The entry point for the dynamics extraction algorithm.
  """
    from util import create_logger

    locale.setlocale(locale.LC_ALL, '')

    policy = torch.load(args.policy)

    env_fn = env_factory(True)

    layers = [int(x) for x in args.layers.split(',')]

    env = env_fn()
    policy.init_hidden_state()
    policy(torch.tensor(env.reset()).float())
    latent_dim = get_hiddens(policy).shape[0]

    models = []
    opts = []
    for fn in [env.get_damping, env.get_mass, env.get_ipos]:
        output_dim = fn().shape[0]
        model = Model(latent_dim, output_dim, layers=layers)

        models += [model]
        opts += [optim.Adam(model.parameters(), lr=args.lr, eps=1e-5)]

        model.policy_path = args.policy

    logger = create_logger(args)

    best_loss = None
    actor_dir = os.path.split(args.policy)[0]
    create_new = True
    if os.path.exists(os.path.join(logger.dir, 'test_latents.pt')):
        x = torch.load(os.path.join(logger.dir, 'train_latents.pt'))
        test_x = torch.load(os.path.join(logger.dir, 'test_latents.pt'))

        damps = torch.load(os.path.join(logger.dir, 'train_damps.pt'))
        test_damps = torch.load(os.path.join(logger.dir, 'test_damps.pt'))

        masses = torch.load(os.path.join(logger.dir, 'train_masses.pt'))
        test_masses = torch.load(os.path.join(logger.dir, 'test_masses.pt'))

        ipos = torch.load(os.path.join(logger.dir, 'train_ipos.pt'))
        test_ipos = torch.load(os.path.join(logger.dir, 'test_ipos.pt'))

        if args.points > len(x) + len(test_x):
            create_new = True
        else:
            create_new = False

    if create_new:
        if not ray.is_initialized():
            ray.init(num_cpus=args.workers)

        print("Collecting {:4d} timesteps of data.".format(args.points))
        points_per_worker = max(args.points // args.workers, 1)
        start = time.time()

        damps, masses, ipos, x = concat(
            ray.get([
                collect_data.remote(policy, points=points_per_worker)
                for _ in range(args.workers)
            ]))

        split = int(0.8 * len(x))

        test_x = x[split:]
        x = x[:split]

        test_damps = damps[split:]
        damps = damps[:split]

        test_masses = masses[split:]
        masses = masses[:split]

        test_ipos = ipos[split:]
        ipos = ipos[:split]

        print(
            "{:3.2f} to collect {} timesteps.  Training set is {}, test set is {}"
            .format(time.time() - start,
                    len(x) + len(test_x), len(x), len(test_x)))
        torch.save(x, os.path.join(logger.dir, 'train_latents.pt'))
        torch.save(test_x, os.path.join(logger.dir, 'test_latents.pt'))

        torch.save(damps, os.path.join(logger.dir, 'train_damps.pt'))
        torch.save(test_damps, os.path.join(logger.dir, 'test_damps.pt'))

        torch.save(masses, os.path.join(logger.dir, 'train_masses.pt'))
        torch.save(test_masses, os.path.join(logger.dir, 'test_masses.pt'))

        torch.save(ipos, os.path.join(logger.dir, 'train_ipos.pt'))
        torch.save(test_ipos, os.path.join(logger.dir, 'test_ipos.pt'))

    for epoch in range(args.epochs):

        random_indices = SubsetRandomSampler(range(len(x) - 1))
        sampler = BatchSampler(random_indices,
                               args.batch_size,
                               drop_last=False)

        for j, batch_idx in enumerate(sampler):
            batch_x = x[batch_idx]  #.float()
            batch = [damps[batch_idx], masses[batch_idx], ipos[batch_idx]]

            losses = []
            for model, batch_y, opt in zip(models, batch, opts):
                loss = 0.5 * (batch_y - model(batch_x)).pow(2).mean()

                opt.zero_grad()
                loss.backward()
                opt.step()

                losses.append(loss.item())

            print("Epoch {:3d} batch {:4d}/{:4d}      ".format(
                epoch, j,
                len(sampler) - 1),
                  end='\r')

        train_y = [damps, masses, ipos]
        test_y = [test_damps, test_masses, test_ipos]
        order = ['damping', 'mass', 'com']

        with torch.no_grad():
            print("\nEpoch {:3d} losses:".format(epoch))
            for model, y_tr, y_te, name in zip(models, train_y, test_y, order):
                loss_total = 0.5 * (y_tr - model(x)).pow(2).mean().item()

                preds = model(test_x)
                test_loss = 0.5 * (y_te - preds).pow(2).mean().item()
                pce = torch.mean(torch.abs((y_te - preds) / (y_te + 1e-5)))
                err = torch.mean(torch.abs((y_te - preds)))

                logger.add_scalar(logger.arg_hash + '/' + name + '_loss',
                                  test_loss, epoch)
                logger.add_scalar(logger.arg_hash + '/' + name + '_percenterr',
                                  pce, epoch)
                logger.add_scalar(logger.arg_hash + '/' + name + '_abserr',
                                  err, epoch)
                model.dyn_parameter = name
                torch.save(model,
                           os.path.join(logger.dir, name + '_extractor.pt'))
                print(
                    "\t{:16s}: train loss {:7.6f} test loss {:7.6f}, err {:5.4f}, percent err {:3.2f}"
                    .format(name, loss_total, test_loss, err, pce))
Beispiel #28
0
__game_state__ = "playing"

__fov_recompute__ = True
__lookmode__ = False

__msgs__ = []
__msg_history__ = []
MSG_WIDTH = 50
MSG_COUNT = 10

__hp_warning__ = 0.5
__trap_low_hp_warning__ = 0.3

__show_chapter__ = False
__chapter_text__ = 'Chapter 1. Departure'
log = util.create_logger()
#DEBUG - debug only (wiz mode), NONE -general info, WARN - warning (hp/mp),
# critical - critical hits, stepping on traps, critical hp level,
#info - general info on skills level up etc
message_levels = { 'DEBUG' : 0,  'NONE' : 1, 'WARN' : 2, 'CRITICAL' : 3, 'INFO' : 4, 'DAMAGE': 5}
prev_message = None
prev_message_count = 1
def message(text, level = 1):
    log.info(text)
    global prev_message, prev_message_count
    if isinstance(level, str):
        level = message_levels[level]
    wraped_msg = textwrap.wrap(text, MSG_WIDTH)
    if prev_message == hash(text):
        prev_message_count += 1
        for line in wraped_msg:
from collections import Iterable
import os
from random import randrange, random, choice, shuffle
from features import features
import thirdparty.libtcod.libtcodpy as libtcod
import util
import des
from items import Item, items
from types import FunctionType
from critters import mobs
from maputils import replace_feature, replace_feature_atxy, find_feature, square_search_nearest

logger = util.create_logger('DG')
ft = util.NamedMap(features)

default_map_chars = {
    'X': ft.fixed_wall,
    '#': ft.rock_wall,
	' ': ft.none,
	'.': ft.floor,
	',': ft.grass,
	'+': ft.door,
	'0': ft.window,
	'{' : ft.fountain,
	'<' : ft.stairs_up,
	'>' : ft.stairs_down,
	'h' : ft.chair,
	'T' : ft.table,
	'8' : ft.bed,
    }
Beispiel #30
0
dev_file = "data/dev.csv"
test_file = "data/test.csv"

mode = sys.argv[1]
if mode != "train" and mode != "test":
    raise ValueError

# Running
with open(vocab_file, 'r') as f:
    vocab = json.load(f)
glove_emb = torch.load(glove_emb_file)
model = HARM_Model(len(vocab), glove_emb).to(device)

if mode == "train":
    best_mrr = 0.
    logger = create_logger("log/", "train.log")

    optimizer = torch.optim.Adadelta(model.parameters(), lr=lr)
    clock = LossClock(["loss"], interval=20)

    ds_train = TrainLoader(train_file, vocab, device)
    ds_dev = TestLoader(dev_file, vocab, device)

    for epoch in range(total_epochs):
        # train
        logger.info("=" * 30 + f"Train epoch {epoch}" + "=" * 30)
        for query, docs in ds_train():
            r = model(query, docs)
            margin_loss = max_margin_loss(r[:1].expand(r[1:].size(0)), r[1:])
            # update
            optimizer.zero_grad()
Beispiel #31
0
import unittest
import random
import itertools
import logging

import util
import model
import train

import torch.optim as optim
import torch.nn as nn

log = util.create_logger(logging.DEBUG)
train.log = log

# synthetic training data
random.seed(777)
input_vocab = list(range(50))  # 50 token vocab
label_vocab = list(range(7))  # 7 DA tags
data = [
    [
        random.choices(input_vocab,
                       k=random.randint(3, 10))  # utts of 3-10 tokens
        for r in range(random.randint(2, 15))
    ]  # 2-15 utts per dialogue
    for i in range(5)
]  # 5 dialogues
labels = [random.choices(label_vocab, k=len(convo)) for convo in data]

utt_dims = 250
n_hidden = 50
from collections import Iterable
import os
from random import randrange, random, choice, shuffle
from features import features
import thirdparty.libtcod.libtcodpy as libtcod
import util
import des
from items import Item, items
from types import FunctionType
from critters import mobs
from maputils import replace_feature, replace_feature_atxy, find_feature, square_search_nearest

logger = util.create_logger('DG')
ft = util.NamedMap(features)

default_map_chars = {
    'X': ft.fixed_wall,
    '#': ft.rock_wall,
    ' ': ft.none,
    '.': ft.floor,
    ',': ft.grass,
    '+': ft.door,
    '0': ft.window,
    '{': ft.fountain,
    '<': ft.stairs_up,
    '>': ft.stairs_down,
    'h': ft.chair,
    'T': ft.table,
    '8': ft.bed,
}
def main(args):
    config = configparser.ConfigParser(
        interpolation=configparser.ExtendedInterpolation())
    config.read(args.config)

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    # check if output dir exists. if so, assign a new one
    if os.path.isdir(args.outdir):
        # create new output dir
        outdir = os.path.join(args.outdir, str(uuid.uuid4()))
    else:
        outdir = args.outdir

    # make the output dir
    os.makedirs(outdir)
    if args.save_best:
        os.makedirs(os.path.join(outdir, 'best_model'))

    # create a logger
    logger = create_logger(__name__,
                           to_disk=True,
                           log_file='{}/{}'.format(outdir, args.logfile))
    tasks = []
    for task_name in args.tasks.split(','):
        task = load_task(
            os.path.join(args.task_spec, '{}.yml'.format(task_name)))
        tasks.append(task)
    device = "cuda" if torch.cuda.is_available() else "cpu"
    device = torch.device(device)

    if not args.load_checkpoint:
        tokenizer = setup_customized_tokenizer(model=args.tokenizer,
                                               do_lower_case=False,
                                               config=config,
                                               tokenizer_class=BertTokenizer)
    else:
        tokenizer = BertTokenizer.from_pretrained(args.checkpoint)

    train_datasets = {}
    dev_dataloaders = {}
    test_dataloaders = {}

    for task_id, task in enumerate(tasks):
        task.set_task_id(task_id)
        logging.info('Task {}: {} on {}'.format(task_id, task.task_type,
                                                task.dataset))
        if 'train' in task.splits:
            train_datasets[task_id] = get_data(task=task,
                                               split='train',
                                               config=config,
                                               tokenizer=tokenizer)
            train_datasets[task_id].set_task_id(task_id)
            task.set_label_map(train_datasets[task_id].label_map)

        if 'dev' in task.splits:
            dev_data = get_data(task=task,
                                split='dev',
                                config=config,
                                tokenizer=tokenizer)
            dev_data.set_task_id(task_id)
            dev_dataloader = DataLoader(dev_data,
                                        shuffle=False,
                                        batch_size=8,
                                        collate_fn=dev_data.collate_fn)
            dev_dataloaders[task_id] = dev_dataloader

        if 'test' in task.splits:
            test_data = get_data(task=task,
                                 split='test',
                                 config=config,
                                 tokenizer=tokenizer)
            test_data.set_task_id(task_id)
            if task.dataset == 'iulaconv':
                import json
                #with open('iulaconv_test.json.analsyis', 'w') as f:
                #    for elm in test_data:
                #        f.write(json.dumps(elm) + '\n')
                #f.close()

            test_dataloader = DataLoader(test_data,
                                         shuffle=False,
                                         batch_size=8,
                                         collate_fn=test_data.collate_fn)
            test_dataloaders[task_id] = test_dataloader

    padding_label = train_datasets[0].padding_label

    sorted_train_datasets = [ds for _, ds in sorted(train_datasets.items())]

    mtl_dataset = MultiTaskDataset(sorted_train_datasets)
    multi_task_batch_sampler = MultiTaskBatchSampler(
        sorted_train_datasets,
        batch_size=args.bs,
        mix_opt=args.mix_opt,
        extra_task_ratio=args.extra_task_ratio,
        annealed_sampling=args.annealed_sampling,
        max_epochs=args.epochs)
    mtl_train_dataloader = DataLoader(mtl_dataset,
                                      batch_sampler=multi_task_batch_sampler,
                                      collate_fn=mtl_dataset.collate_fn,
                                      pin_memory=False)

    model = MTLModel(bert_encoder=args.bert_model,
                     device=device,
                     tasks=tasks,
                     padding_label_idx=padding_label,
                     load_checkpoint=args.load_checkpoint,
                     checkpoint=os.path.join(args.checkpoint, 'model.pt'),
                     tokenizer=tokenizer)

    # get optimizer
    # TODO: in case of loading from checkpoint, initialize optimizer using saved optimizer state dict
    optimizer = get_optimizer(optimizer_name='adamw',
                              model=model,
                              lr=args.lr,
                              eps=args.eps,
                              decay=args.decay)

    # get lr schedule
    total_steps = (len(mtl_dataset) /
                   args.grad_accumulation_steps) * args.epochs
    warmup_steps = args.warmup_frac * total_steps
    logger.info(
        'Bs_per_device={}, gradient_accumulation_steps={} --> effective bs= {}'
        .format(args.bs, args.grad_accumulation_steps,
                args.bs * args.grad_accumulation_steps))
    logger.info('Total steps: {}'.format(total_steps))
    logger.info('Scheduler: {} with {} warmup steps'.format(
        'warmuplinear', warmup_steps))

    scheduler = get_scheduler(optimizer,
                              scheduler='warmuplinear',
                              warmup_steps=warmup_steps,
                              t_total=total_steps)

    model.fit(tasks,
              optimizer,
              scheduler,
              gradient_accumulation_steps=args.grad_accumulation_steps,
              train_dataloader=mtl_train_dataloader,
              dev_dataloaders=dev_dataloaders,
              test_dataloaders=test_dataloaders,
              epochs=args.epochs,
              evaluation_step=args.evaluation_steps,
              save_best=args.save_best,
              outdir=outdir,
              predict=args.predict)
Beispiel #34
0
import torch
from model import fn_cls
import util
from transformers import BertTokenizer, BertModel
import torch.nn as nn
import numpy as np
import time
import argparse

parser = argparse.ArgumentParser("Text-cls training task.")
parser.add_argument("--epoch", type=int, default=10)
logger = util.create_logger("Bert Training", log_file='train.log')
args = parser.parse_args()


def main():
    logger.info("Loading data...")
    tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')
    train_loader = util.profile('train.txt', tokenizer)
    # test_loader = util.profile('test.txt', tokenizer)
    dev_loader = util.profile('dev.txt', tokenizer)
    logger.info("Build model...")
    bert = BertModel.from_pretrained('bert-base-chinese')
    model = fn_cls(bert)
    criterion = nn.BCELoss()
    sigmoid = nn.Sigmoid()
    max_epoch = args.epoch
    model.cuda()
    model.train()
    optim = torch.optim.Adam(model.parameters(), lr=1e-5)
Beispiel #35
0
def main(config):
    logger = util.create_logger(name='train_log', log_dir=config.log_dir)
    if not os.path.exists(config.log_dir):
        os.makedirs(config.log_dir, exist_ok=True)
    util.save_config(config.log_dir, config.config)
    logger.info('Logs and models will be save in {}.'.format(config.log_dir))

    rnd = np.random.RandomState(seed=config.seed)
    solution = util.create_solution(device='cpu:0')
    num_params = solution.get_num_params()
    if config.load_model is not None:
        solution.load(config.load_model)
        print('Loaded model from {}'.format(config.load_model))
        init_params = solution.get_params()
    else:
        init_params = None
    solver = cma.CMAEvolutionStrategy(
        x0=np.zeros(num_params) if init_params is None else init_params,
        sigma0=config.init_sigma,
        inopts={
            'popsize': config.population_size,
            'seed': config.seed if config.seed > 0 else 42,
            'randn': np.random.randn,
        },
    )

    best_so_far = -float('Inf')
    ii32 = np.iinfo(np.int32)
    repeats = [config.reps] * config.population_size

    device_type = 'cpu' if args.num_gpus <= 0 else 'cuda'
    num_devices = mp.cpu_count() if args.num_gpus <= 0 else args.num_gpus
    with mp.get_context('spawn').Pool(
            initializer=worker_init,
            initargs=(args.config, device_type, num_devices),
            processes=config.num_workers,
    ) as pool:
        for n_iter in range(config.max_iter):
            params_set = solver.ask()
            task_seeds = [rnd.randint(0, ii32.max)] * config.population_size
            fitnesses = []
            ss = 0
            while ss < config.population_size:
                ee = ss + min(config.num_workers, config.population_size - ss)
                fitnesses.append(
                    pool.map(func=get_fitness,
                             iterable=zip(params_set[ss:ee], task_seeds[ss:ee],
                                          repeats[ss:ee])))
                ss = ee
            fitnesses = np.concatenate(fitnesses)
            if isinstance(solver, cma.CMAEvolutionStrategy):
                # CMA minimizes.
                solver.tell(params_set, -fitnesses)
            else:
                solver.tell(fitnesses)
            logger.info(
                'Iter={0}, '
                'max={1:.2f}, avg={2:.2f}, min={3:.2f}, std={4:.2f}'.format(
                    n_iter, np.max(fitnesses), np.mean(fitnesses),
                    np.min(fitnesses), np.std(fitnesses)))

            best_fitness = max(fitnesses)
            if best_fitness > best_so_far:
                best_so_far = best_fitness
                model_path = os.path.join(config.log_dir, 'best.npz')
                save_params(solver=solver,
                            solution=solution,
                            model_path=model_path)
                logger.info(
                    'Best model updated, score={}'.format(best_fitness))

            if (n_iter + 1) % config.save_interval == 0:
                model_path = os.path.join(config.log_dir,
                                          'iter_{}.npz'.format(n_iter + 1))
                save_params(solver=solver,
                            solution=solution,
                            model_path=model_path)
# -*- coding: utf-8 -*-

from __future__ import print_function
from node import Node
from random import choice
from twisted.internet import reactor
from twisted.web.error import Error
from twisted.web.resource import NoResource
from twisted.web.resource import Resource
from twisted.web.server import NOT_DONE_YET
from twisted.web.server import Site
import cgi
import json
import util

logger = util.create_logger("node_api.log")


class NodeAPI(Resource):
    """
    Handles the basic routing of requests to the appropirate resource.
    '/{register, unregister}/<service_name>' maps to the 'Register' resource.
    '/<service_name>' maps to the 'Service' resource.
    """

    def __init__(self, node, web_port):
        Resource.__init__(self)
        self.node = node

        reactor.listenTCP(web_port, Site(self))
        logger.info("API listening on: {}".format(web_port))
Beispiel #37
0
from npc import *
import world
from random import choice, shuffle, randrange
from itertools import combinations, chain
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
import util
import items
import acquire
logger = util.create_logger('plot')
#3d10 + 20 roll for mNPC
MNPC_ROLL = (3, 10, 20)
DEITY_ROLL = (2, 3, 2)
#1d4 + 4 (from 6-10 quests)
QUEST_GIVER_ROLL =  (2, 3, 4)
#1d4 - 1 roll for immobile NPCs (means 0-3).
IMMOBILE_NPC_ROLL = (1, 4, -1)
#uniques roll 5d2 + 3  (from 8 to 13)
UNIQUES_ROLL = (5, 2, 3)
#traders roll. we want at leat 10 of them, 20 at max.
TRADERS_ROLL = (2, 6,  8)
#min artefacts count
ARTEFACTS_COUNT = (4, 3, 4)
CITIES_COUNT = 6
#the chance of ceratain NPC to become holder of some artefact
ARTEFACT_OWNER_CHANCE = 4

debug_names = True
def assign_random_name_from_list(instances, names, demon = False):
    for npc in world.mNPC:
        if npc.name is not None:
            continue
 def __init__(self):
     self.logger = create_logger(__name__)
     self.actions_file = os.getenv(self.ACTIONS_FILE, self.ACTIONS_FILE_WRITE_LOCATION)
     self.conf_file = os.getenv(self.LEGACY_CONFIG, None)
     self.projectre = re.compile(r'^[a-z0-9]([-a-z0-9]*[a-z0-9])?$')
     self.projectmaxlen = 63