コード例 #1
0
ファイル: main.py プロジェクト: congchan/language-model
def train(load_best_loss):
    '''If gluon trainer recognizes multi-devices,
    it will automatically aggregate the gradients and synchronize the parameters.'''

    logging.info('-' * 50 + "Begin training" + '-' * 50)
    # Loop over epochs.
    best_loss = float("Inf")
    not_improves_times = 0
    best_epoch = 0
    for epoch in range(start_epoch, args.epochs):

        cur_lr = trainer.learning_rate
        tic = time.time()
        train_one_epoch(epoch, cur_lr)
        val_loss, val_time = evaluate(val_data, eval_batch_size)
        toc = time.time()
        trainer.set_learning_rate(cur_lr)

        logging.info('-' * 120)
        logging.info(
            '| end of epoch {:3d} with lr {:2.4f} | train time: {:5.2f}s | val time: {:5.2f}s | '
            ' valid loss {:5.3f} | valid ppl {:8.2f}'.format(
                epoch, cur_lr, toc - tic, val_time, val_loss,
                math.exp(val_loss)))
        epoch_info.append(
            [epoch, cur_lr, toc - tic, val_time, val_loss,
             math.exp(val_loss)])
        utils.save_info(epoch_info, epoch_file)
        logging.info('-' * 120)
        ''' If no pre-trained model loaded, the load_best_loss is float("Inf"),
                then any improvment will be saved, and update the load_best_loss
            if loaded from pre-trained model, there is valid real value load_best_loss
                But still need val_loss to help find a good downward direction along the loss surface,
        '''
        if val_loss < best_loss:
            if val_loss < load_best_loss:
                load_best_loss = val_loss
                model.save_parameters(os.path.join(path, 'model.params'))
                utils.read_kvstore(trainingfile, update={'lr': cur_lr})
                logging.info('Performance improving; Save the best model!')
            else:
                logging.info('Performance improving, but not the best one.')
            best_loss = val_loss
            best_epoch = epoch
            not_improves_times = 0
        else:
            not_improves_times += 1
            if not_improves_times == args.early_stop and args.schedual_rate:
                not_improves_times = 0
                load_model()
                new_lr = args.schedual_rate * cur_lr
                trainer.set_learning_rate(new_lr)
                logging.info(
                    'No improvement, anneal lr to {:2.4f}, rolling back to epoch {}'
                    .format(new_lr, best_epoch))
                epoch_info.append(
                    ["roll_back_to", None, None, None, None, None, best_epoch])
                batch_info.append(["roll_back_to", best_epoch])
        utils.read_kvstore(trainingfile, update={'epoch': epoch})
コード例 #2
0
def main():
    page = 0

    while True:
        page += 1

        payload = {
            'ss': 1,
            'page': page,
        }
        user_agent = generate_user_agent()
        headers = {
            'User-Agent': user_agent,
        }

        print(f'PAGE: {page}')
        response = requests.get(HOST + ROOT_PATH,
                                params=payload,
                                headers=headers)
        response.raise_for_status()
        # random_sleep()

        # if response.status_code != 200:
        #     print('something wrong!')
        #     break

        html = response.text

        soup = BeautifulSoup(html, 'html.parser')

        class_ = 'card card-hover card-visited wordwrap job-link'
        cards = soup.find_all('div', class_=class_)
        if not cards:
            cards = soup.find_all('div', class_=class_ + ' js-hot-block')

        result = []
        if not cards:
            # from pdb import set_trace
            # set_trace()
            break

        for card in cards:
            tag_a = card.find('h2').find('a')
            title = tag_a.text
            href = tag_a['href']
            result.append([title, href])
            # get vacancy full info

        save_info(result)
コード例 #3
0
                elif leave == 'y':
                    cap.release()
                    cv2.destroyAllWindows()
                    print("Exit label program")
                    sys.exit(1)
                elif leave == 'n':
                    break

        if leave == 'y':
            cap.release()
            cv2.destroyAllWindows()
            print("Exit label program")
            sys.exit(1)

    elif key == ord('s'):
        saved_success = save_info(info, video_path)

    elif key == ord('n'):
        if frame_no >= n_frames - 1:
            print("This is the last frame")
            continue
        frame_no += 1
        image = go2frame(cap, frame_no, info)
        print("Frame No.{}".format(frame_no))

    elif key == ord('p'):
        if frame_no == 0:
            print("This is the first frame")
            continue
        frame_no -= 1
        image = go2frame(cap, frame_no, info)
コード例 #4
0
def run_episode(client: carla.Client, controller: Controller,
                args) -> (dict, dict):
    '''
    Runs single episode. Configures world and agent, spawns it on map and controlls it from start point to termination
    state.

    :param client: carla.Client, client object connected to the Carla Server
    :param actor: carla.Vehicle
    :param controller: inherits abstract Controller class
    :param spawn_points: orginal or inverted list of spawnpoints
    :param writer: SummaryWriter, logger for tensorboard
    :param viz: visdom.Vis, other logger #refactor to one dictionary
    :param args: argparse.args, config #refactor to dict
    :return: status:str, succes
             actor_dict -> speed, wheels turn, throttle, reward -> can be taken from actor?
             env_dict -> consecutive locations of actor, distances to closest spawn point, starting spawn point
             array[np.array] -> photos
    '''

    NUM_STEPS = args.num_steps
    spawn_points_df = pd.read_csv(f'{DATA_PATH}/spawn_points/{args.map}.csv')
    spawn_points = df_to_spawn_points(spawn_points_df,
                                      n=10000,
                                      invert=args.invert)
    environment = Environment(client=client)
    world = environment.reset_env(args)

    agent_config = {
        'world': world,
        'controller': controller,
        'vehicle': VEHICLES[args.vehicle],
        'sensors': SENSORS,
        'spawn_points': spawn_points,
        'invert': args.invert
    }
    environment.init_agents(no_agents=args.no_agents,
                            agent_config=agent_config)

    if len(environment.agents) < 1:
        return dict({}), []

    spectator = world.get_spectator()
    spectator.set_transform(
        numpy_to_transform(spawn_points[environment.agents[0].spawn_point_idx -
                                        30]))

    environment.stabilize_vehicles()

    #INITIALIZE SENSORS
    environment.initialize_agents_sensors()

    for i in range(DATA_POINTS):
        world.tick()

    environment.initialize_agents_reporting()
    for agent in environment.agents:
        agent._release_control()
        print(f'{agent} control released')

    save_paths = [agent.save_path for agent in environment.agents]
    status = dict(
        {str(agent): 'Max steps exceeded'
         for agent in environment.agents})
    slow_frames = [0 for i in range(len(environment.agents))]

    for step in range(NUM_STEPS):

        states = [
            agent.get_state(step, retrieve_data=True)
            for agent in environment.agents
        ]
        actions = [
            agent.play_step(state)
            for agent, state in zip(environment.agents, states)
        ]

        world.tick()

        next_states = [{
            'velocity': agent.velocity,
            'location': agent.location
        } for agent in environment.agents]

        rewards = []
        for agent, state, next_state in zip(environment.agents, states,
                                            next_states):
            reward = environment.calc_reward(points_3D=agent.waypoints,
                                             state=state,
                                             next_state=next_state,
                                             gamma=GAMMA,
                                             step=step,
                                             punishment=EXTRA_REWARD /
                                             agent.initial_distance)
            rewards.append(reward)

        for idx, (state, action, reward, agent) in enumerate(
                zip(states, actions, rewards, environment.agents)):
            if agent.distance_2finish < 50:
                print(
                    f'agent {str(agent)} finished the race in {step} steps car {args.vehicle}'
                )
                step_info = save_info(path=agent.save_path,
                                      state=state,
                                      action=action,
                                      reward=EXTRA_REWARD * GAMMA**step)
                status[str(agent)] = 'Finished'
                terminal_state = agent.get_state(step=step + 1,
                                                 retrieve_data=False)
                save_terminal_state(path=agent.save_path,
                                    state=terminal_state,
                                    action=action)
                agent.destroy(data=True, step=step)
                environment.agents.pop(idx)
                continue

            elif agent.collision > 0:
                print(
                    f'failed, collision {str(agent)} at step {step}, car {args.vehicle}'
                )
                step_info = save_info(path=agent.save_path,
                                      state=state,
                                      action=action,
                                      reward=reward - EXTRA_REWARD *
                                      (GAMMA**step))
                status[str(agent)] = 'Collision'
                terminal_state = agent.get_state(step=step + 1,
                                                 retrieve_data=False)
                save_terminal_state(path=agent.save_path,
                                    state=terminal_state,
                                    action=action)
                agent.destroy(data=True, step=step)
                environment.agents.pop(idx)
                continue

            if state['velocity'] < 10:
                if slow_frames[idx] > 100:
                    print(
                        f'agent {str(agent)} stuck, finish on step {step}, car {args.vehicle}'
                    )
                    step_info = save_info(path=agent.save_path,
                                          state=state,
                                          action=action,
                                          reward=reward - EXTRA_REWARD *
                                          (GAMMA**step))
                    status[str(agent)] = 'Stuck'
                    terminal_state = agent.get_state(step=step + 1,
                                                     retrieve_data=False)
                    terminal_state['collisions'] = 2500
                    save_terminal_state(path=agent.save_path,
                                        state=terminal_state,
                                        action=action)
                    agent.destroy(data=True, step=step)
                    environment.agents.pop(idx)
                    continue
                slow_frames[idx] += 1

            step_info = save_info(path=agent.save_path,
                                  state=state,
                                  action=action,
                                  reward=reward)

        if len(environment.agents) < 1:
            print('fini')
            break

    if len(environment.agents) > 1:
        for agent in environment.agents:
            agent.destroy(data=True, step=NUM_STEPS)

    for (agent, info), path in zip(status.items(), save_paths):
        df = pd.read_csv(f'{path}/episode_info.csv')
        if args.controller == 'MPC':
            idx = 26
            df.loc[:idx, 'steer'] = 0.
            df.loc[:idx, 'state_steer'] = 0.
        if info == 'Max steps exceeded':
            idx = len(df) - 1
            df.loc[idx, 'steer'] = 0.
            df.loc[idx, 'gas_brake'] = 0.
            df.loc[idx, 'reward'] = 0.
            df.loc[idx, 'done'] = 1.
        #Update qvalues
        df['q'] = [sum(df['reward'][i:]) for i in range(df.shape[0])]
        df.to_csv(f'{path}/episode_info.csv', index=False)

    world.tick()
    world.tick()

    return buffer, status, save_paths
コード例 #5
0
ファイル: CMan.py プロジェクト: vinhsuhi/LBSN2Vec
                                friendship_old_ori,
                                friendship_new,
                                k=10,
                                maps_OritP=maps_OritP,
                                maps_PtOri=maps_PtOri)
        exit()

    # --------------------------------------------- #
    sentences = random_walk(friendship_old_persona, n_users, args,
                            user_location, center_ori_maps)
    # --------------------------------------------- #

    neg_user_samples, neg_checkins_samples = sample_neg(
        friendship_old_persona, persona_checkins)
    embs_ini = initialize_emb(args, n_nodes_total)
    save_info(args, sentences, embs_ini, neg_user_samples,
              neg_checkins_samples, train_user_checkins)

    learn.apiFunction("temp/processed/", args.learning_rate, args.K_neg,
                      args.win_size, args.num_epochs, args.workers,
                      args.mobility_ratio)
    embs_file = "temp/processed/embs.txt"
    embs = read_embs(embs_file)
    embs_user = embs[:offset1]

    print("Current ACC")
    friendship_pred_persona(embs_user,
                            friendship_old_ori,
                            friendship_new,
                            k=10,
                            maps_OritP=maps_OritP,
                            maps_PtOri=maps_PtOri)
コード例 #6
0
def run_episode(client:carla.Client, controller:Controller, buffer:ReplayBuffer,
                writer:SummaryWriter, global_step:int, args) -> (ReplayBuffer, dict, dict):
    '''
    Runs single episode. Configures world and agent, spawns it on map and controlls it from start point to termination
    state.

    :param client: carla.Client, client object connected to the Carla Server
    :param actor: carla.Vehicle
    :param controller: inherits abstract Controller class
    :param spawn_points: orginal or inverted list of spawnpoints
    :param writer: SummaryWriter, logger for tensorboard
    :param viz: visdom.Vis, other logger #refactor to one dictionary
    :param args: argparse.args, config #refactor to dict
    :return: status:str, succes
             actor_dict -> speed, wheels turn, throttle, reward -> can be taken from actor?
             env_dict -> consecutive locations of actor, distances to closest spawn point, starting spawn point
             array[np.array] -> photos
    '''

    NUM_STEPS = args.num_steps
    spawn_points_df = pd.read_csv(f'{DATA_PATH}/spawn_points/{args.map}.csv')
    spawn_points = df_to_spawn_points(spawn_points_df, n=10000, invert=args.invert)
    environment = Environment(client=client)
    world = environment.reset_env(args)
    agent_config = {'world':world, 'controller':controller, 'vehicle':VEHICLES[args.vehicle],
                    'sensors':SENSORS, 'spawn_points':spawn_points, 'invert':args.invert}
    environment.init_agents(no_agents=args.no_agents, agent_config=agent_config)
    if len(environment.agents) < 1:
        return buffer, dict({}), []
    args_path = '/'.join(environment.agents[0].save_path.split('/')[:-1])
    os.makedirs(args_path, exist_ok=True)
    json.dump({'global_step':global_step, **vars(args)}, fp=open(f'{args_path}/simulation_global_step_{global_step}_args.json', 'a'), indent=4)

    spectator = world.get_spectator()
    spectator.set_transform(numpy_to_transform(
        spawn_points[environment.agents[0].spawn_point_idx-30]))

    environment.stabilize_vehicles()

    environment.initialize_agents_sensors()

    for i in range(args.no_data):
        world.tick()
        for agent in environment.agents:
            agent.retrieve_data()

    environment.initialize_agents_reporting()
    for agent in environment.agents:
        agent._release_control()
        print(f'{agent} control released')

    save_paths = [agent.save_path for agent in environment.agents]
    status = dict({str(agent): 'Max steps exceeded' for agent in environment.agents})
    slow_frames = [0 for i in range(len(environment.agents))]

    episode_actor_loss_v = 0
    episode_critic_loss_v = 0
    local_step = 0
    agents_2pop = []
    for step in range(NUM_STEPS):
        local_step = step
        states = [agent.get_state(step, retrieve_data=True) for agent in environment.agents]
        actions = [agent.play_step(state) for agent, state in zip(environment.agents, states)]

        world.tick()
        for agent in environment.agents:
            agent.retrieve_data()
        next_states = [{'velocity': agent.velocity,'location': agent.location} for agent in environment.agents]

        rewards = []
        for agent, state, next_state in zip(environment.agents, states, next_states):
            reward = environment.calc_reward(points_3D=agent.waypoints, state=state, next_state=next_state,
                                             gamma=GAMMA, step=step, punishment=EXTRA_REWARD / agent.initial_distance)
            rewards.append(reward)

        for idx, (state, action, reward, agent) in enumerate(zip(states, actions, rewards, environment.agents)):
            if agent.distance_2finish < 50:
                print(f'agent {str(agent)} finished the race in {step} steps car {args.vehicle}')

                step_info = save_info(path=agent.save_path, state=state, action=action, reward=reward)
                buffer.add_step(path=agent.save_path, step=step_info)
                status[str(agent)] = 'Finished'
                terminal_state = agent.get_state(step=step+1, retrieve_data=False)
                save_terminal_state(path=agent.save_path, state=terminal_state, action=action)

                agent.destroy(data=True, step=step)
                agents_2pop.append(idx)
                continue

            elif agent.collision > 0:
                print(f'failed, collision {str(agent)} at step {step}, car {args.vehicle}')
                step_info = save_info(path=agent.save_path, state=state, action=action,
                                      reward=reward - EXTRA_REWARD * (GAMMA ** step))
                buffer.add_step(path=agent.save_path, step=step_info)
                status[str(agent)] = 'Collision'
                terminal_state = agent.get_state(step=step+1, retrieve_data=False)
                save_terminal_state(path=agent.save_path, state=terminal_state, action=action)

                agent.destroy(data=True, step=step)
                agents_2pop.append(idx)
                continue

            if state['velocity'] < 10:
                if slow_frames[idx] > SLOW_FRAMES:
                    print(f'agent {str(agent)} stuck, finish on step {step}, car {args.vehicle}')
                    step_info = save_info(path=agent.save_path, state=state, action=action,
                                          reward=reward - EXTRA_REWARD * (GAMMA ** (step-0.8*SLOW_FRAMES)))
                    buffer.add_step(path=agent.save_path, step=step_info)
                    status[str(agent)] = 'Stuck'
                    terminal_state = agent.get_state(step=step+1, retrieve_data=False)
                    terminal_state['collisions'] = 2500
                    save_terminal_state(path=agent.save_path, state=terminal_state, action=action)
                    agent.destroy(data=True, step=step)
                    agents_2pop.append(idx)
                    continue
                slow_frames[idx] += 1

            step_info = save_info(path=agent.save_path, state=state, action=action, reward=reward)
            buffer.add_step(path=agent.save_path, step=step_info)

        if args.controller == 'NN' and len(environment.agents) > 0 and len(buffer) > 1e4:
            actor_loss_avg = 0
            critic_loss_avg = 0
            for i in range(len(environment.agents)):
                batch = buffer.sample()
                actor_loss_v, critic_loss_v, q_ref_v = controller.train_on_batch(batch=batch, gamma=GAMMA)
                actor_loss_avg += actor_loss_v
                critic_loss_avg += critic_loss_v
            episode_actor_loss_v += actor_loss_avg / (buffer.batch_size * len(environment.agents))
            episode_critic_loss_v += critic_loss_avg / (buffer.batch_size * len(environment.agents))
            writer.add_scalar('local/actor_loss_v', scalar_value=actor_loss_avg/(buffer.batch_size * len(environment.agents)), global_step=global_step+local_step)
            writer.add_scalar('local/critic_loss_v', scalar_value=critic_loss_avg/(buffer.batch_size * len(environment.agents)), global_step=global_step+local_step)

        for idx in sorted(agents_2pop, reverse=True):
            environment.agents.pop(idx)
            agents_2pop.remove(idx)

        if len(environment.agents) < 1:
            print('fini')
            break

    if len(environment.agents) > 1:
        for agent in environment.agents:
            agent.destroy(data=True, step=NUM_STEPS)
    
    episode_q = 0
    
    for (agent, info), path in zip(status.items(), save_paths):
        df = pd.read_csv(f'{path}/episode_info.csv')
        if args.controller == 'MPC':
            idx = 13
            df.loc[:idx, 'steer'] = 0.
            df.loc[:idx, 'state_steer'] = 0.
        if info == 'Max steps exceeded':
            idx = len(df)-1
            df.loc[idx-1,'reward'] = -EXTRA_REWARD * (GAMMA ** NUM_STEPS) #TODO -> discuss if necessary
            df.loc[idx,'steer'] = 0.
            df.loc[idx,'gas_brake'] = 0.
            df.loc[idx,'reward'] = 0. #TODO -> discuss if necessary
            df.loc[idx,'done'] = 1.
        #Update qvalues
        df['q'] = [sum(df['reward'][i:]) for i in range(df.shape[0])]
        episode_q += sum(df['reward'])
        df.to_csv(f'{path}/episode_info.csv', index=False)

    episode_q /= len(save_paths)
    episode_info = {
        'episode_q':episode_q,
        'episode_actor_loss_v': episode_actor_loss_v / local_step,
        'episode_critic_loss_v': episode_critic_loss_v / local_step
    }

    world.tick()
    world.tick()

    return episode_info, buffer, status, save_paths, (global_step+local_step), local_step
コード例 #7
0
def main():
    page = 0

    while True:
        page += 1

        payload = {
            'ss': 1,
            'page': page,
        }

        user_agent = generate_user_agent()
        headers = {
            'User-Agent': user_agent,
        }

        print(f'PAGE: {page}')
        response = requests.get(HOST + ROOT_PATH,
                                params=payload,
                                headers=headers)
        response.raise_for_status()
        random_sleep()

        html = response.text

        soup = BeautifulSoup(html, 'html.parser')

        class_ = 'card card-hover card-visited wordwrap job-link'
        cards = soup.find_all('div', class_=class_)
        if not cards:
            cards = soup.find_all('div', class_=class_ + ' js-hot-block')

        result = []
        if not cards:
            break

        for card in cards:
            tag_a = card.find('h2').find('a')
            title = tag_a.text
            href = tag_a['href']
            result.append([title, href])
            vac_response = requests.get(HOST + href, headers=headers)
            vac_html = vac_response.text
            vac_soup = BeautifulSoup(vac_html, 'html.parser')

            workua_id = int(href.split('/')[-2])

            vacancy = vac_soup.find('h1', id='h1-name').text

            address = vac_soup.find(
                'p', class_='text-indent add-top-sm').text.strip()
            address = address.split('\n')[0]

            blocks = vac_soup.find_all(
                'p', class_='text-indent text-muted add-top-sm')
            for block in blocks:
                if block.find('a') != None:
                    company = block.find('a').find('b').text
                else:
                    if block.find('b') != None:
                        salary = block.find('b').text
                        salary = salary.replace('\u202f', '')
                        salary = salary.replace('\u2009', '')
                if not 'salary' in locals():
                    salary = None

            data = (workua_id, vacancy, company, address, salary)
            cur.execute('''INSERT INTO jobs VALUES (?, ?, ?, ?, ?)''', data)

            db.commit()

        save_info(result)

    db.close()
コード例 #8
0
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from generate_seeds_ositas import search_seeds_ostias
from setting import setting_info
from build_vessel_tree import build_vessel_tree, TreeNode, dfs_search_tree
from utils import save_info
import os

res_seeds, res_ostia = search_seeds_ostias()
seeds_gen_info_to_save = os.path.join(setting_info["seeds_gen_info_to_save"], "seeds.csv")
ostias_gen_info_to_save = os.path.join(setting_info["ostias_gen_info_to_save"], "ostias.csv")
infer_line_to_save = setting_info["infer_line_to_save"]
fig_to_save = setting_info["fig_to_save"]
reference_path = setting_info["reference_path"]
save_info(res_seeds, path=seeds_gen_info_to_save)
save_info(res_ostia, path=ostias_gen_info_to_save)
seeds = pd.read_csv(seeds_gen_info_to_save)[["x", "y", "z"]].values

ostias = []
head_node_list = pd.read_csv(ostias_gen_info_to_save)[["x", "y", "z"]].values
ostias_thr = 10
node_first = head_node_list[0]
ostias.append(node_first.tolist())
for node in head_node_list:
    if np.linalg.norm(node - node_first) > ostias_thr:
        ostias.append(node.tolist())
        break
if len(ostias)<2:
    print("not find 2 ostia points")
else:
コード例 #9
0
ファイル: main.py プロジェクト: congchan/language-model
def train_one_epoch(epoch, cur_lr):
    ''' Train all the batches within one epoch.
    costs is the container created once and reuse for efficiency'''

    total_loss = 0
    states = [model.begin_state(batch_size=m, ctx=ctx) for ctx in ctxs]

    # Loop all batches
    batch, cursor = 0, 0
    tic_log_interval = time.time()
    while cursor < train_data.shape[0] - 1 - 1:
        #######################################################################
        # Control seq_len cited from origin paper
        random_bptt = args.bptt if np.random.random(
        ) < 0.95 else args.bptt / 2.
        # Normal distribution (mean, variance): Prevent extreme sequence lengths
        seq_len = max(5, int(np.random.normal(random_bptt, 5)))
        # There's a very small chance that it could select a very long sequence length resulting in OOM
        seq_len = min(seq_len, args.bptt + args.max_seq_len_delta)
        # Rescale learning rate depending on the variable length w.r.t bptt
        trainer.set_learning_rate(cur_lr * seq_len / args.bptt)
        ########################################################################
        '''Each batch shape(seq_len, batch_size), split data to each device.
        m is the # of samples for each device, devided along batch_size axis.'''
        Xs, Ys = get_batch(train_data, cursor, args, seq_len=seq_len)
        assert args.batch_size == Xs.shape[
            1], 'data shape[1] should be batch_size'
        Xs = gluon.utils.split_and_load(Xs, ctxs, 1)
        Ys = gluon.utils.split_and_load(Ys, ctxs, 1)
        tic_b = time.time()

        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        states = detach(states)
        loss_list = []
        with autograd.record():  # train_mode
            for i, X in enumerate(Xs):
                output, states[i], encoded_raw, encoded_dropped = model(
                    X, states[i])  # state(num_layers, bsz, hidden_size)
                device_loss = joint_loss(output, Ys[i], encoded_raw,
                                         encoded_dropped)
                loss_list.append(device_loss.as_in_context(ctxs[0]) / X.size)
        for l in loss_list:
            l.backward()
        ''' trainer.allreduce_grads()
            For each parameter, reduce the gradients from different contexts.
            Should be called after autograd.backward(), outside of record() scope, and before trainer.update().
            For normal parameter updates, step() should be used, which internally calls allreduce_grads() and then update().
            However, in gradient clipping, manually call allreduce_grads() and update() separately.
        '''
        # trainer.allreduce_grads()
        # grads = [p.grad(ctxs[0]) for p in parameters]
        grads = [p.grad(ctx) for ctx in ctxs for p in parameters]
        gluon.utils.clip_global_norm(grads, args.clipping_theta)
        trainer.step(1)
        # trainer.update(1)

        batch_loss = sum([nd.sum(l).asscalar() for l in loss_list]) / len(ctxs)
        toc_b = time.time()
        batch_info.append([
            epoch, batch, trainer.learning_rate, seq_len,
            (toc_b - tic_b) * 1000,
            args.batch_size * seq_len // (toc_b - tic_b), batch_loss,
            math.exp(batch_loss)
        ])

        total_loss += batch_loss

        if batch % args.log_interval == 0 and batch > 0:
            utils.save_info(batch_info, batch_file)

            toc_log_interval = time.time()
            total_loss = total_loss / args.log_interval

            logging.info(
                '| epoch {:4d} ({:5.2f}%)| batch {:4d} | lr {:7.4f} | seq_len {:2d} | {:4.0f} ms/batch | '
                '{:5d} tokens/s | loss {:6.3f} | ppl {:5.2f}'.format(
                    epoch, cursor / train_data.shape[0] * 100, batch,
                    trainer.learning_rate, seq_len,
                    (toc_log_interval - tic_log_interval) * 1000 /
                    args.log_interval,
                    int(args.batch_size * args.log_interval * seq_len /
                        (toc_log_interval - tic_log_interval)), total_loss,
                    math.exp(total_loss)))

            total_loss = 0
            tic_log_interval = time.time()

        batch += 1
        cursor += seq_len

        global parameters_count
        if not parameters_count:
            logging.info('Parameters (except embeding): {}'.format(
                sum(p.data(ctxs[0]).size for p in parameters)))
            parameters_count = 1

    nd.waitall()  # synchronize batch data
コード例 #10
0
ファイル: main.py プロジェクト: congchan/language-model
        load_model()
        load_best_loss, val_time = evaluate(val_data, eval_batch_size)
        load_best_ppl = math.exp(load_best_loss)
        logging.info("Loaded model: val_time {:5.2f}, valid loss {}, ppl {}\
                     ".format(val_time, load_best_loss, load_best_ppl))

    # At any point you can hit Ctrl + C to break out of training early.
    # logging.info(model.summary(nd.zeros((args.bptt, m))))

    try:
        if not args.predict_only:
            # set the header of csv logging files
            epoch_info = []
            epoch_file = os.path.join(path, 'epoch_results.csv')
            utils.save_info([
                'epoch', 'lr', 'train_time(s)', 'val_time(s)', 'val_loss',
                'perplexity'
            ], epoch_file)

            batch_info = []
            batch_file = os.path.join(path, 'batch_results.csv')
            utils.save_info([
                'epoch', 'batch', 'learning_rate', 'seq_len', 'ms/batch',
                'tokens/s', 'val_loss', 'perplexity'
            ], batch_file)
            parameters = model.collect_params().values()
            parameters_count = 0
            train(load_best_loss)
    except KeyboardInterrupt:
        logging.info('-' * 89)
        logging.info('Exiting from training early')
    finally:
コード例 #11
0
ファイル: parse_workua.py プロジェクト: AnnaSnigur/parser
def main():
    page = 0

    while True:
        page += 1

        payload = {
            'ss': 1,
            'page': page,
        }
        user_agent = generate_user_agent()
        headers = {
            'User-Agent': user_agent,
        }

        print(f'PAGE: {page}')
        response = requests.get(HOST + ROOT_PATH,
                                params=payload,
                                headers=headers)
        random_sleep()
        response.raise_for_status()

        html = response.text

        soup = BeautifulSoup(html, 'html.parser')

        class_ = 'card card-hover card-visited wordwrap job-link'
        cards = soup.find_all('div', class_=class_)
        if not cards:
            cards = soup.find_all('div', class_=class_ + ' js-hot-block')

        result = []

        if not cards:
            break

        for card in cards:
            tag_a = card.find('h2').find('a')
            title = tag_a.text
            href = tag_a['href']
            path = requests.get(HOST + href, headers=headers)
            text = path.text
            soup = BeautifulSoup(text, 'html.parser')
            company = soup.find(class_='').find('b').text

            try:
                salary = soup.find(class_='').find('b').text
            except AttributeError:
                salary = 'No information'

            try:
                description = soup.find(id='job-description').find_all(
                    ['p', 'b', 'li'])
            except AttributeError:
                description = 'No information'

            result.append([
                f'Ссылка: {href},\n'
                f'Вакансия: {title},\n'
                f'Компания: {company},\n'
                f'Зарплата: {salary},\n'
                f'Описание: {description}\n'
            ])
            save_db(href, title, salary, company, description)

            json_db = ({{
                'Ссылка': href,
                'Вакансия': title,
                'Компания': company,
                'Зарплата': salary,
                'Описание': description.replace('\n', ''),
            }})
            json_save(json_db)
        save_info(result)