def extract_subtitles_batch(data):
    for settings in data:
        val_to_utf8_str(settings, ['input_dir', 'output_dir'])
        check_dirs(settings)
        input_lang = 'zh-TW'
        output_lang = 'zh-TW'
        extension = 'vtt'
        if 'input_lang' in settings:
            input_lang = to_utf8_str(settings['input_lang'])
        if 'output_lang' in settings:
            output_lang = to_utf8_str(settings['output_lang'])
        if 'extension' in settings:
            extension = to_utf8_str(settings['extension'])
        get_it_path, get_ot_path = gen_path_tools(settings,
                                                  ['input_dir', 'output_dir'])
        input_names = filter(filter_media, listdir(settings['input_dir']))
        input_paths = map(get_it_path, input_names)
        output_paths = map(get_ot_path, input_names)
        output_paths = map(gen_extend_path('-', extension), output_paths)
        final_settings = []
        for i in range(len(input_names)):
            final_settings.append({
                'input_path': input_paths[i],
                'output_path': output_paths[i],
                'input_lang': input_lang,
                'output_lang': output_lang,
                'extension': extension,
            })
        show_info('Input', settings['input_dir'])
        start = time.time()
        extract_subtitles(final_settings)
        show_time(start)
        show_info('Input', settings['input_dir'], is_end=True)
Esempio n. 2
0
    def deploy():
        """
        Deploys remotely master branch changes to production server.

        Returns:
            None
        """
        try:
            print()
            info_print(
                "This script will only succeed if you have configured the "
                "production server ssh port to match the one in "
                "config/production.py and have proper ssh access."
            )
            warning_print(
                "This process will update the production server with the "
                "latest master branch changes."
            )
            option = input("Do you want to continue? (y/n)\n")
            if option != "y" and option != "Y":
                sys.exit()
            t0 = time.time()
            remote_production_task("update_project")
            remote_production_task("install_python_requirements")
            remote_production_task("collect_static_files")
            remote_production_task("make_migrations")
            remote_production_task("migrate")
            remote_production_task("restart_server")
            t1 = time.time()
            show_time(t0, t1)
        except KeyboardInterrupt:
            error_print("Process aborted. Bye!")
            sys.exit()
Esempio n. 3
0
async def keepalive(msg, sockfd, txt='', interval=20):
    while True:
        # second = datetime.datetime.now().strftime('%m%d%H%M-%S')[-2:]
        # if int(second) % 20 == 0:
        # if async:
        await asyncio.sleep(interval)
        # time.sleep(interval)
        show_time(txt)
        rmsg = query(msg, sockfd)
Esempio n. 4
0
    def run(self):
        try:
            while not thread_end:
                show_time("[P2P >keepalive] Start waiting")

                self.waiter.wait(timeout=self.interval)
                print('[P2P >keepalive] {} greetings from {} with thread {}'.format(show_time(), self.txt, self.name))

                rmsg = query(self.msg, self.sockfd)
        finally:
            print("[P2P >keepalive] {} internally ended".format(self.name))
Esempio n. 5
0
def setup_development_machine():
    """
    Creates remotely a new ubuntu virtual machine ready to work on the project.

    Returns:
        None
    """
    try:
        print()
        info_print("You need to have Vagrant installed in your host machine.")
        warning_print(
            "This process will clean all data on your previous virtual "
            "machine and build it again from scratch."
        )
        option = input("Do you want to continue? (y/n)\n")
        if option != "y" and option != "Y":
            sys.exit()
        t0 = time.time()
        pretty_print("\nCleaning previous installation ...")
        os.chdir("vagrant")
        run("vagrant destroy --force")
        run("vagrant box update")
        run("vagrant up")
        os.chdir("../")
        remote_development_task("update_repositories")
        remote_development_task("create_app_group_and_user")
        remote_development_task("install_postgresql")
        remote_development_task("create_postgresql_user_and_db")
        remote_development_task("install_jpeg_libraries_for_pillow")
        remote_development_task("install_git")
        remote_development_task("download_project")
        remote_development_task("install_virtualenv")
        remote_development_task("create_virtualenv")
        remote_development_task("export_project_keys")
        remote_development_task("install_python_requirements")
        remote_development_task("make_migrations")
        remote_development_task("migrate")
        remote_development_task("install_npm_and_grunt")
        remote_development_task("install_npm_packages")
        remote_development_task("install_ruby_and_sass")
        remote_development_task("install_unity")
        remote_development_task("create_test_data")
        remote_development_task("install_pycharm")
        remote_development_task("configure_pycharm")
        os.chdir("vagrant")
        run("vagrant halt")
        os.chdir("../")
        local_development_task("show_last_tips")
        t1 = time.time()
        show_time(t0, t1)
    except KeyboardInterrupt:
        error_print("Process aborted. Bye!")
        sys.exit()
Esempio n. 6
0
    def setup_production_server():
        """
        Configures remotely the production server initial setup and deployment.

        Returns:
            None
        """
        try:
            print()
            info_print(
                "This script will only succeed if you have configured the "
                "production server ssh port to match the one in "
                "config/production.py and have proper ssh access."
            )
            info_print(
                "After this setup, you should configure the admin allowed IP "
                "in the nginx configuration file."
            )
            warning_print(
                "This process will clean all data on the production server "
                "and build up everything again from scratch."
            )
            option = input("Do you want to continue? (y/n)\n")
            if option != "y" and option != "Y":
                sys.exit()
            t0 = time.time()
            remote_production_task("clean_production_server")
            remote_production_task("update_repositories")
            remote_production_task("create_app_group_and_user")
            remote_production_task("install_postgresql")
            remote_production_task("create_postgresql_user_and_db")
            remote_production_task("install_jpeg_libraries_for_pillow")
            remote_production_task("install_git")
            remote_production_task("download_project")
            remote_production_task("install_virtualenv")
            remote_production_task("create_virtualenv")
            remote_production_task("export_project_keys")
            remote_production_task("install_python_requirements")
            remote_production_task("create_runtime_files_and_dirs")
            remote_production_task("collect_static_files")
            remote_production_task("make_migrations")
            remote_production_task("migrate")
            remote_production_task("install_supervisor")
            remote_production_task("configure_supervisor")
            remote_production_task("install_nginx")
            remote_production_task("configure_nginx")
            remote_production_task("restart_server")
            t1 = time.time()
            show_time(t0, t1)
        except KeyboardInterrupt:
            error_print("Process aborted. Bye!")
            sys.exit()
Esempio n. 7
0
def gen_subclip(input_dir, output_dir, input_name, intervals):
    input_path = path.join(input_dir, input_name)
    for idx, x in enumerate(intervals):
        output_path = get_output_path(output_dir, input_name, idx, x)
        cmd = ['pipenv', 'run', 'python', 'cut.py']
        args = [
            input_path,
            str(x['start_time']),
            str(x['end_time']), output_path
        ]
        show_info('', output_path, level=2)
        st_time = time.time()
        subprocess.call(cmd + args)
        show_time(st_time)
        show_info('', output_path, is_end=True, level=2)
Esempio n. 8
0
def query(msg_str, sockfd, recv_size=1000):
    expected = {
        "L:": "G:",
        "J:": "M:",
    }
    msg = str.encode(msg_str)

    rmsg = "[Nothing]"
    for _ in range(10):
        check_header = (msg_str[:2], rmsg[:2])

        sockfd.send(msg)
        rmsg = sockfd.recv(recv_size).decode("utf-8")
        if not ((msg_str in expected) and
                (check_header not in expected.items())):
            break
        else:
            print("[inter >query] querying the {}th time".format(_))

    if rmsg.startswith("F:"):
        # if rmsg == 'F:error message::\r\n':
        print("[Error] {} F error message: {}, for input: {}".format(
            show_time(), rmsg, msg_str))

    return rmsg
def build_tfidf_sim_features(df_all):
    start_time = time.time()
    print ('building features 1: tf-idf between search_term and product title...')
    df_all['tf-idf_term_title'] = build_similarity(df_all['search_term'], df_all['product_title'])
    print ("-- use %s minutes --", show_time(start_time))

    print ('building features 2: tf-idf between search_term and product description...')
    df_all['tf-idf_term_desc'] = build_similarity(df_all['search_term'], df_all['product_description'])
    print ("-- use %s minutes --", show_time(start_time))

    print ('building features 3: tf-idf between search_term and brand...')
    df_all['tf-idf_term_brand'] = build_similarity(df_all['search_term'], df_all['brand'])
    print ("-- use %s minutes --", show_time(start_time))

    print ('tf-idf features build finished')
    print ("-- use %s minutes --", show_time(start_time))

    return df_all
def main():
    """
  # Examples to run this script
  ## Example 1: feed arguments from a yaml/yml file
    * Step 1. Edit your yaml file, 
      ```
      $ vim my_settings.yaml
      ```
      The following is the yaml/yml file example
      ```
      ---
      data:
      - input_dir: input_videos_1
        output_dir: output_videos_1
        # input_lang: zh-TW
        # output_lang: zh-TW
        # extension: vtt # output extension format
      - input_dir: input_videos_2
        output_dir: output_videos_2
        # input_lang: zh-TW
        # output_lang: zh-TW
        # extension: vtt # output extension format
      ...
      ```
    * Step 2. Run the command
      ```
      $ pipenv run python extract_subtitles_batch.py my_settings.yaml
      ```
    If you don't provide a yaml file and then run the command like this:
    ```
    $ pipenv run python extract_subtitles_batch.py
    ```
    "extract_subtitles_batch.py" would automatically use "extract_subtitles_batch.yaml" as default;
    if "extract_subtitles_batch.yaml" does not exist, the program would raise error.
  """
    yaml_path = get_yaml_path('extract_subtitles_batch.yaml')
    config = read_yaml(yaml_path)
    global_start = time.time()
    extract_subtitles_batch(config['data'])
    show_time(global_start, prefix='Total')
Esempio n. 11
0
def main():
    args = get_args()
    if args.inspect_result: get_results(); return

    combinations = get_combinations()  # 1296
    combination_set = combinations[args.start_ix:args.end_ix]

    for parameter_set in combination_set:
        uid = show_time()
        cmd = 'python train.py -save_dir {save_dir} ' \
              '-emb_dropout {} ' \
              '-lstm_n_layer {} ' \
              '-lstm_dropout {} ' \
              '-lstm_dim {} ' \
              '-linear_dropout {} ' \
              '-weight_decay {} ' \
            .format(save_dir=uid, *parameter_set)
        os.system(cmd)
Esempio n. 12
0
def do_Quit():
    global my_tcp_server, my_tcp_conns, thread_end
    CmdWin.insert(1.0, "\nPress Quit")

    if my_tcp_server is not None:
        my_tcp_server.close()
        for conn in my_tcp_conns:
            conn.close()
        print("[Info] Closed tcp_conn, tcp_server")

    # for p in multiproc:
    #     p.terminate()
    #     p.join()
    # print("[Info] Closed multiprocessing")

    # sys.exit(0)

    thread_end = True
    # for t in multithread:
    # 	t.raise_exception()
    multithread_dict = {t.name: t for t in multithread}
    show_time("[Info] multithread_dict:{}".format(multithread_dict))

    for t_name in list(sorted(multithread_dict.keys())):
        t = multithread_dict[t_name]
        show_time("[----Info] {name} has joined".format(name=t.name))
        if t_name == 'keep alive thread':
            t.waiter.set()
        elif 'forwardlink thread':
            thread_event.set()
        t.raise_exception()
        t.join()
        show_time("[++++Info] {name} has joined".format(name=t.name))
    show_time("[Info] Closed multithreading")

    # last to close roomchat_sock because server_thread requires roomchat_sock
    roomchat_sock.close()
    print("[Info] Closed socket")

    sys.exit(0)
Esempio n. 13
0
def main(args):
    # Load dataset
    print('> Loading dataset ...')
    Dataset = MemoryFriendlyLoader(origin_img_dir=args.gt_dir,
                                   edited_img_dir=args.train_dir,
                                   pathlistfile=args.filelist)
    loader_train = torch.utils.data.DataLoader(dataset=Dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=8,
                                               pin_memory=True)
    print('\t# of training samples: %d\n' % int(len(Dataset)))

    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    model = Alignment(cuda_flag=True).cuda()
    model = torch.nn.DataParallel(model)

    criterion = nn.L1Loss().cuda()

    # Optimizer
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    plotx = []
    ploty = []

    checkpoint_path = args.ckp_dir + 'checkpoint_%depoch_%d_pretrain.ckpt'\
                      % (args.start_epoch, args.sigma)
    if args.use_checkpoint:
        model, optimizer, start_epoch, ploty = load_checkpoint(
            model, optimizer, checkpoint_path)
        model = torch.nn.DataParallel(model)
        print('cps loaded!')
        plotx = list(range(len(ploty)))

    # Training
    for epoch in range(args.start_epoch, args.epochs):
        losses = 0
        # train over all data in the epoch
        for step, (x, y, path_code) in enumerate(loader_train):

            # Pre-training step
            model.train()
            model.zero_grad()
            optimizer.zero_grad()

            frames_input = x
            reference = y

            frames_input = Variable(frames_input.cuda())
            reference = Variable(reference.cuda())

            # Evaluate model and optimize it
            out_train = model(frames_input)
            loss = 0
            index = [0, 1, 2, 4, 5, 6]
            for i in index:
                loss += criterion(out_train[:, i, :, :, :], reference)
            losses += loss.item()
            loss.backward()
            optimizer.step()

            if step % 100 == 0:
                # Results
                model.eval()
                psnr_train = 0
                for i in index:
                    psnr_train += batch_PSNR(out_train[:, i, :, :, :],
                                             reference, 1.)
                psnr_train = psnr_train / 6
                print('%s  [epoch %d][%d/%d]  loss: %f  PSNR_train: %.4fdB' % \
                    (show_time(datetime.datetime.now()), epoch + 1, step + 1, len(loader_train), losses / (step+1), psnr_train))

        # save loss pic
        plotx.append(epoch + 1)
        ploty.append(losses / (step + 1))
        if epoch // 1 == epoch / 1:
            plt.plot(plotx, ploty)
            plt.savefig(args.loss_pic)
        # save loss.txt
        file = open(args.savetxt, 'a')
        file.write('epoch %d loss: %f, val_psnr: %f\n' %
                   ((epoch + 1), losses / (step + 1), psnr_train))
        file.close()
        # save checkpoint
        if not os.path.exists(args.ckp_dir):
            os.mkdir(args.ckp_dir)
        save_checkpoint(
            model, optimizer, epoch + 1, ploty, args.ckp_dir +
            'checkpoint_%depoch_%d_pretrain.ckpt' % (epoch + 1, args.sigma))
        # save align.pkl
        torch.save(model,
                   os.path.join(args.save_dir + '/align_%d.pkl' % args.sigma))
        torch.save(model.module.state_dict(),
                   os.path.join(args.save_dir + '/align_%d.pth' % args.sigma))
Esempio n. 14
0
    def __init__(
        self,
        proc_id=0,
        data_dir='tmp/',
        train_fname='train.csv',
        preprocessed=True,
        lower=True,
        vocab_max_size=100000,
        emb_dim=100,
        save_vocab_fname='vocab.json',
        verbose=True,
    ):
        self.verbose = verbose and (proc_id == 0)
        tokenize = lambda x: x.split() if preprocessed else 'spacy'

        INPUT = Field(
            sequential=True,
            batch_first=True,
            tokenize=tokenize,
            lower=lower,
            # include_lengths=True,
        )
        # TGT = Field(sequential=False, dtype=torch.long, batch_first=True,
        #             use_vocab=False)
        TGT = Field(sequential=True, batch_first=True)
        SHOW_INP = RawField()
        fields = [
            ('tgt', TGT),
            ('input', INPUT),
            ('show_inp', SHOW_INP),
        ]

        if self.verbose:
            show_time("[Info] Start building TabularDataset from: {}{}".format(
                data_dir, 'train.csv'))
        datasets = TabularDataset.splits(
            fields=fields,
            path=data_dir,
            format=train_fname.rsplit('.')[-1],
            train=train_fname,
            validation=train_fname.replace('train', 'valid'),
            test=train_fname.replace('train', 'test'),
            skip_header=True,
        )
        INPUT.build_vocab(
            *datasets,
            max_size=vocab_max_size,
            vectors=GloVe(name='6B', dim=emb_dim),
            unk_init=torch.Tensor.normal_,
        )
        # load_vocab(hard_dosk) like opennmt
        # emb_dim = {50, 100}
        # Elmo
        TGT.build_vocab(*datasets)

        self.INPUT = INPUT
        self.TGT = TGT
        self.train_ds, self.valid_ds, self.test_ds = datasets

        if save_vocab_fname and self.verbose:
            writeout = {
                'tgt_vocab': {
                    'itos': TGT.vocab.itos,
                    'stoi': TGT.vocab.stoi,
                },
                'input_vocab': {
                    'itos': INPUT.vocab.itos,
                    'stoi': INPUT.vocab.stoi,
                },
            }
            fwrite(json.dumps(writeout, indent=4), save_vocab_fname)

        if self.verbose:
            msg = "[Info] Finished building vocab: {} INPUT, {} TGT" \
                .format(len(INPUT.vocab), len(TGT.vocab))
            show_time(msg)
Esempio n. 15
0
class BatchWrapper:
    def __init__(self, iterator):
        self.iterator = iterator

    def __len__(self):
        return len(self.iterator)

    def __iter__(self):
        for batch in self.iterator:
            yield batch


class Struct:
    def __init__(self, **entries):
        self.__dict__.update(entries)


if __name__ == '__main__':
    from tqdm import tqdm

    file_dir = "~/proj/1908_prac_toxic/data/yelp/"
    dataset = Dataset(data_dir=file_dir)
    train_dl, valid_dl, test_dl = dataset.get_dataloader()
    show_time('[Info] Begin iterating 10 epochs')
    for epoch in range(10):
        for batch in tqdm(train_dl):
            pass
            # inpect padding num distribution
            # use `pack_padded_sequence`
    show_time('[Info] Finished loading')
Esempio n. 16
0
def main():
    """
  # Examples to run this script
  ## Example 1: feed arguments from a yaml/yml file
    * Step 1. Edit your yaml file, 
      ```
      $ vim my_settings.yaml
      ```
      The following is the yaml/yml file example
      ```
      ---
      data:
      - input_dir: input_videos_1
        output_dir: output_videos_1
        combnations:
        - input_name: 1-final-a.mp4
          intervals:
          - start_time: 0.50
            end_time: 1.56
          - start_time: 1.00
            end_time: 2.00
        - input_name: 2-final-a.mp4
          intervals:
          - start_time: 0.50
            end_time: 1.56
          - start_time: 1.00
            end_time: 2.00
      - input_dir: input_videos_2
        output_dir: output_videos_2
        combnations:
        - input_name: 1-final-b.mp4
          intervals:
          - start_time: 0.50
            end_time: 1.56
          - start_time: 1.00
            end_time: 2.00
        - input_name: 2-final-b.mp4
          intervals:
          - start_time: 0.50
            end_time: 1.56
          - start_time: 1.00
            end_time: 2.00
      ...
      ```
    * Step 2. Run the command
      ```
      $ pipenv run python cut_batch.py my_settings.yaml
      ```
    If you don't provide a yaml file and then run the command like this:
    ```
    $ pipenv run python cut_batch.py
    ```
    "cut_batch.py" would automatically use "cut_batch.yaml" as default;
    if "cut_batch.yaml" does not exist, the program would raise error.
  """
    yaml_path = get_yaml_path('cut_batch.yaml')
    config = read_yaml(yaml_path)
    global_start = time.time()
    for settings in config['data']:
        val_to_utf8_str(settings, ['input_dir', 'output_dir'])
        check_dirs(settings)
        show_info('Input', settings['input_dir'])
        build_subclips(settings)
        show_info('Input', settings['input_dir'], is_end=True)
    show_time(global_start, prefix='Total')
Esempio n. 17
0
def main(args):
    # Load dataset
    print('> Loading dataset ...')
    Dataset = MemoryFriendlyLoader(origin_img_dir=args.gt_dir, edited_img_dir=args.train_dir,
                                   pathlistfile=args.filelist)
    loader_train = torch.utils.data.DataLoader(dataset=Dataset, batch_size=args.batch_size, shuffle=True,
                                               num_workers=8, pin_memory=True)
    print('\t# of training samples: %d\n' % int(len(Dataset)))

    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    model = MAPVDNet(cuda_flag=True, alignment_model=args.pretrained_model, T=args.stages).cuda()
    model = torch.nn.DataParallel(model)

    criterion = nn.L1Loss().cuda()

    # Optimizer
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    plotx = []
    ploty = []

    checkpoint_path = args.ckp_dir + 'checkpoint_%d_%depoch.ckpt' % (args.sigma, args.start_epoch)
    if args.use_checkpoint:
        model, optimizer, start_epoch, ploty = load_checkpoint(model, optimizer, checkpoint_path)
        model = torch.nn.DataParallel(model)
        print('cps loaded!')
        plotx = list(range(len(ploty)))


    # Training
    for epoch in range(args.start_epoch, args.epochs):
        losses = 0

        # train over all data in the epoch
        for step, (x, y, path_code) in enumerate(loader_train):

            # Pre-training step
            model.train()
            model.zero_grad()
            optimizer.zero_grad()

            frames_input = x
            frame_clean = y

            frame_clean = Variable(frame_clean.cuda())
            frames_input = Variable(frames_input.cuda())

            # Evaluate model and optimize it
            x_list = model(frames_input)

            loss = criterion(x_list[-1], frame_clean)
            for i in range(1, len(x_list)-1): # 1, 2, 3, 4
                loss += 0.0001 * criterion(x_list[i], frame_clean)

            losses += loss.item()
            loss.backward()
            optimizer.step()

            if step % 100 == 0:
                # Results
                model.eval()
                psnr_train = batch_PSNR(x_list[-1], frame_clean, 1.)
                print('%s  [epoch %d][%d/%d]  loss: %f  PSNR_train: %.4fdB' % \
                    (show_time(datetime.datetime.now()), epoch + 1, step + 1, len(loader_train), losses / (step+1), psnr_train))

        # save loss pic
        plotx.append(epoch + 1)
        ploty.append(losses / (step + 1))
        if epoch // 1 == epoch / 1:
            plt.plot(plotx, ploty)
            plt.savefig(args.loss_pic)
        # save loss.txt
        file = open(args.savetxt, 'a')
        file.write('epoch %d loss: %f, val_psnr: %f\n' % ((epoch + 1), losses / (step+1), psnr_train))
        file.close()
        # save checkpoint
        if not os.path.exists(args.ckp_dir):
            os.mkdir(args.ckp_dir)
        save_checkpoint(model, optimizer, epoch + 1, ploty, args.ckp_dir + 'checkpoint_%d_%depoch.ckpt' %
                        (args.sigma, epoch + 1))
        # save denoise.pkl
        torch.save(model, os.path.join(args.save_dir + '/denoising_%d_%d.pkl' % (args.sigma, epoch + 1)))
Esempio n. 18
0
def get_args():
    cur_time = show_time(printout=False)
    parser = configargparse.ArgumentParser(
        description='Args for Text Classification')
    group = parser.add_argument_group('Model Hyperparameters')
    group.add_argument(
        '-init_xavier',
        default=False,
        action='store_true',
        help='whether to use xavier normal as initiator for model weights')
    group.add_argument('-emb_dropout',
                       default=0.3,
                       type=float,
                       help='dropout of the embedding layer')
    group.add_argument('-emb_dim',
                       default=100,
                       type=int,
                       help='dimension of embedding vectors')
    group.add_argument('-vocab_max_size',
                       default=100000,
                       type=int,
                       help='max number of words in vocab')
    group.add_argument('-lstm_n_layer',
                       default=1,
                       type=int,
                       help='num of layers in LSTM')
    group.add_argument('-lstm_dropout',
                       default=0.3,
                       type=float,
                       help='dropout in >=1th LSTM layer')
    group.add_argument('-lstm_dim',
                       default=100,
                       type=int,
                       help='dimension of the lstm hidden states')
    group.add_argument('-lstm_combine',
                       default='add',
                       choices=['add', 'concat'],
                       type=str,
                       help='the way to combine bidirectional lstm outputs')
    group.add_argument('-n_linear',
                       default=1,
                       type=int,
                       help='number of linear layers after lstm')
    group.add_argument('-linear_dropout',
                       default=0.5,
                       type=float,
                       help='dropout of the penultimate layer')
    group.add_argument('-n_classes',
                       default=2,
                       type=int,
                       help='number of classes to predict')

    group = parser.add_argument_group('Training Specs')
    group.add_argument('-seed', default=0, type=int, help='random seed')
    group.add_argument('-batch_size', default=10, type=int, help='batch size')
    group.add_argument('-epochs',
                       default=100,
                       type=int,
                       help='number of epochs to train the model')
    group.add_argument('-lr', default=0.001, type=float, help='learning rate')
    group.add_argument('-weight_decay',
                       default=1e-5,
                       type=float,
                       help='weight decay')

    group = parser.add_argument_group('Files')
    group.add_argument('-data_dir',
                       default='data/re_semeval/',
                       type=str,
                       help='the directory for data files')
    group.add_argument('-train_fname',
                       default='train.csv',
                       type=str,
                       help='training file name')
    group.add_argument('-data_sizes',
                       nargs=3,
                       default=[None, None, None],
                       type=int,
                       help='# samples to use in train/dev/test files')
    group.add_argument('-preprocessed',
                       action='store_false',
                       default=True,
                       help='whether input data is preprocessed by spacy')
    group.add_argument('-lower',
                       action='store_true',
                       default=False,
                       help='whether to lowercase the input data')

    group.add_argument('-uid',
                       default=cur_time,
                       type=str,
                       help='the id of this run')
    group.add_argument('-save_dir',
                       default='tmp/',
                       type=str,
                       help='directory to save output files')
    group.add_argument('-save_dir_cp',
                       default='tmp_cp/',
                       type=str,
                       help='directory to backup output files')
    group.add_argument('-save_meta_fname',
                       default='run_meta.txt',
                       type=str,
                       help='file name to save arguments and model structure')
    group.add_argument('-save_log_fname',
                       default='run_log.txt',
                       type=str,
                       help='file name to save training logs')
    group.add_argument('-save_valid_fname',
                       default='valid_e00.txt',
                       type=str,
                       help='file name to save valid outputs')
    group.add_argument('-save_vis_fname',
                       default='example.txt',
                       type=str,
                       help='file name to save visualization outputs')
    group.add_argument('-save_model_fname',
                       default='model',
                       type=str,
                       help='file to torch.save(model)')
    group.add_argument('-save_vocab_fname',
                       default='vocab.json',
                       type=str,
                       help='file name to save vocab')

    group = parser.add_argument_group('Run specs')
    group.add_argument('-n_gpus', default=1, type=int, help='# gpus to run on')
    group.add_argument('-load_model',
                       default='',
                       type=str,
                       help='path to pretrained model')
    group.add_argument('-verbose',
                       action='store_true',
                       default=False,
                       help='whether to show pdb.set_trace() or not')

    args = parser.parse_args()
    return args
Esempio n. 19
0
import time
import ctypes

sys.path.append('.')

from utils import sdbm_hash, show_time
from build_socket import build_tcp_client  # forward_link # retain_forward_link # build_tcp_server,
from interaction import query, parse_name, parse_rmsg, handle_join_rmsg, \
    parse_memberships, parse_members, parse_send_message

from time import sleep

#
# Global variables
#
show_time("[P2P] start program")
roomchat_ip = sys.argv[1]
roomchat_ip = '127.0.0.1' if roomchat_ip == 'localhost' else roomchat_ip

roomchat_port = int(sys.argv[2])
roomchat_sock = build_tcp_client(roomchat_ip, roomchat_port)

myip = '127.0.0.1'
myport = int(sys.argv[3])
mysock = None
username = ""
roomname = ""

msgID = 0
# this will include myself, so that when my message is sent back to me, I know not to resend it
# HID as str, msgID as int
Esempio n. 20
0
net.cuda()

# MultiCVM_params = list(map(id, net.BiCVM.parameters()))
# SAnet_params = filter(lambda p: id(p) not in MultiCVM_params, net.parameters())
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                    net.parameters()),
                             lr=LR,
                             weight_decay=WEIGHT_DECAY)
# optimizer = torch.optim.Adam([{'params': SAnet_params}, {'params': net.BiCVM.parameters(), 'lr': LR / 10}],
#                              lr=LR,
#                              weight_decay=WEIGHT_DECAY)
loss_func = torch.nn.MSELoss()

# Training
prev_time = datetime.datetime.now()  # current time
print('%s  Start training...' % show_time(prev_time))
plotx = []
ploty = []
check_loss = 1

for epoch in range(EPOCH):
    losses = 0
    count = 0
    for step, (s1, ground_truth) in enumerate(train_loader):
        s1 = s1.cuda()
        ground_truth = ground_truth.cuda()

        label = net(s1)

        loss = loss_func(label, ground_truth)
        losses += loss.item()