Esempio n. 1
0
 def __init__(self, input_channels, output_channels, kernel, 
              dropout=0.0, activation='identity', dilation=1, groups=1, batch_norm=True):
     super(HighwayConvBlock, self).__init__(input_channels, 2*output_channels, kernel, dropout, activation, 
                                            dilation, groups, batch_norm)
     self._gate = Sigmoid()
Esempio n. 2
0

class StructuredFunction:
    """
    Two activation functions, one represents restrictions of Sum Type (results should sum to 1),
    other - restrictions of Product Type (results are in [0, 1] and independent of each other)
    """
    def __init__(self, sum, prod):
        self.sum = sum
        self.prod = prod


class Linear(Function):
    """
    Linear activation function. Doesn't do any transformation of data
    """
    @staticmethod
    def forward(ctx, input):
        return input

    @staticmethod
    def backward(ctx, grad_outputs):
        return grad_outputs


linear = Linear.apply

structuredLinear = StructuredFunction(linear, linear)

structuredSigmoid = StructuredFunction(Softmax(dim=1), Sigmoid())
Esempio n. 3
0
    def __init__(self, input_shape, n_convfilter,
                 n_fc_filters, h_shape, conv3d_filter_shape): #n_convfilter = [96, 128, 256, 256, 256, 256]
        print("\ninitializing \"encoder\"")
        #input_shape = (self.batch_size, 3, img_w, img_h)
        super(encoder, self).__init__()
        # conv1
        self.conv1a = Conv2d(input_shape[1], n_convfilter[0], 7, padding=3)
        self.bn1a = BatchNorm2d(n_convfilter[0])
        self.conv1b = Conv2d(n_convfilter[0], n_convfilter[0], 3, padding=1)
        self.bn1b = BatchNorm2d(n_convfilter[0])

        # conv2
        self.conv2a = Conv2d(n_convfilter[0], n_convfilter[1], 3, padding=1)
        self.bn2a = BatchNorm2d(n_convfilter[1])
        self.conv2b = Conv2d(n_convfilter[1], n_convfilter[1], 3, padding=1)
        self.bn2b = BatchNorm2d(n_convfilter[1])
        self.conv2c = Conv2d(n_convfilter[0], n_convfilter[1], 1)
        self.bn2c = BatchNorm2d(n_convfilter[1])

        # conv3
        self.conv3a = Conv2d(n_convfilter[1], n_convfilter[2], 3, padding=1)
        self.bn3a = BatchNorm2d(n_convfilter[2])
        self.conv3b = Conv2d(n_convfilter[2], n_convfilter[2], 3, padding=1)
        self.bn3b = BatchNorm2d(n_convfilter[2])
        self.conv3c = Conv2d(n_convfilter[1], n_convfilter[2], 1)
        self.bn3c = BatchNorm2d(n_convfilter[2])

        # conv4
        self.conv4a = Conv2d(n_convfilter[2], n_convfilter[3], 3, padding=1)
        self.bn4a = BatchNorm2d(n_convfilter[3])
        self.conv4b = Conv2d(n_convfilter[3], n_convfilter[3], 3, padding=1)
        self.bn4b = BatchNorm2d(n_convfilter[3])

        # conv5
        self.conv5a = Conv2d(n_convfilter[3], n_convfilter[4], 3, padding=1)
        self.bn5a = BatchNorm2d(n_convfilter[4])
        self.conv5b = Conv2d(n_convfilter[4], n_convfilter[4], 3, padding=1)
        self.bn5b = BatchNorm2d(n_convfilter[4])

        # conv6
        self.conv6a = Conv2d(n_convfilter[4], n_convfilter[5], 3, padding=1)
        self.bn6a = BatchNorm2d(n_convfilter[5])
        self.conv6b = Conv2d(n_convfilter[5], n_convfilter[5], 3, padding=1)
        self.bn6b = BatchNorm2d(n_convfilter[5])

        # pooling layer
        self.pool = MaxPool2d(kernel_size=2, padding=1) # batch_size, 256, 64, 64

        # nonlinearities of the network
        self.leaky_relu = LeakyReLU(negative_slope=0.01)
        self.sigmoid = Sigmoid()
        self.tanh = Tanh()

        # find the input feature map size of the fully connected layer
        fc7_feat_w, fc7_feat_h = self.fc_in_featmap_size(
            input_shape, num_pooling=6)
        # define the fully connected layer
        self.fc7 = Linear(
            int(n_convfilter[5] * fc7_feat_w * fc7_feat_h), n_fc_filters[0]) # batch_size, 1024

        # define the FCConv3DLayers in 3d convolutional gru unit
        #conv3d_filter_shape = (self.n_deconvfilter[0], self.n_deconvfilter[0], 3, 3, 3)
        # 128*128*3*3*3
        self.t_x_s_update = BN_FCConv3DLayer_torch( # conv3d_filter_shape = [128, 128, 3, 3, 3] h_shape = (batch_size, 128, 4, 4, 4)
            n_fc_filters[0], conv3d_filter_shape, h_shape) #n_convfilter = [96, 128, 256, 256, 256, 256]
        self.t_x_s_reset = BN_FCConv3DLayer_torch(  # n_deconvfilter = [128, 128, 128, 64, 32, 2]
            n_fc_filters[0], conv3d_filter_shape, h_shape) # 1024
        self.t_x_rs = BN_FCConv3DLayer_torch(
            n_fc_filters[0], conv3d_filter_shape, h_shape) # 1024, 
Esempio n. 4
0
 def __init__(self, dimension):
     super(HighwayLayer, self).__init__()
     self._linear = Sequential(Linear(dimension, dimension), ReLU())
     self._gate = Sequential(Linear(dimension, dimension), Sigmoid())
Esempio n. 5
0
 def __init__(self, in_node_feats, in_global_feats):
     super().__init__()
     self.node_fn = Sequential(Linear(in_node_feats, 2), Sigmoid())
     self.global_fn = Sequential(Linear(in_global_feats, 5), Sigmoid())
    def __init__(self, n_features, n_embeddings, n_units):
        super(Net, self).__init__()
        self.n_features = n_features
        self.n_embeddings = n_embeddings
        self.n_units = n_units

        self.encoder = ModuleDict({
            'gru':
            GRU(self.n_features,
                self.n_units,
                3,
                dropout=0.1,
                bidirectional=True,
                batch_first=True),
            'linear':
            Linear(2 * self.n_units, self.n_embeddings)
        })

        self.decoder = ModuleDict({
            'gru':
            GRU(self.n_embeddings,
                self.n_units,
                3,
                dropout=0.1,
                bidirectional=True,
                batch_first=True),
            'linear':
            Linear(2 * self.n_units, self.n_features)
        })

        self.decoder1 = ModuleDict({
            'gru':
            GRU(16,
                self.n_units,
                3,
                dropout=0.1,
                bidirectional=True,
                batch_first=True),
            'linear':
            Linear(2 * self.n_units, 16)
        })

        self.decoder2 = ModuleDict({
            'gru':
            GRU(10,
                self.n_units,
                3,
                dropout=0.1,
                bidirectional=True,
                batch_first=True),
            'linear':
            Linear(2 * self.n_units, 10)
        })

        self.decoder3 = ModuleDict({
            'gru':
            GRU(self.n_embeddings,
                self.n_units,
                3,
                dropout=0.1,
                bidirectional=True,
                batch_first=True),
            'linear':
            Linear(2 * self.n_units, 1)
        })

        self.decoder4 = ModuleDict({
            'gru':
            GRU(self.n_embeddings,
                self.n_units,
                3,
                dropout=0.1,
                bidirectional=True,
                batch_first=True),
            'linear':
            Linear(2 * self.n_units, 1)
        })

        self.relu = ReLU()
        self.sigmoid = Sigmoid()
Esempio n. 7
0
def main(in_path, outpath):
    nltk.download()

    span_extractor = torch.load(os.path.join(EXPERIMENT,
                                             'best_span_extractor.tar'),
                                map_location='cpu')
    answer_verifier = torch.load(os.path.join(EXPERIMENT,
                                              'best_answer_verifier.tar'),
                                 map_location='cpu')
    span_extractor.use_cuda = False
    answer_verifier.use_cuda = False

    tokenizer = StanfordTokenizer(
        options={'ptb3Escaping':
                 True})  # same tokenizer used by lexical parser
    parser = StanfordParser(java_options='-mx5g')

    data = json.load(open(in_path, 'r'))['data']
    batches = []
    official_eval = {}
    official_eval_tokens = {}
    qaid_map = {}

    num_articles = len(data)
    for aidx in range(len(data)):
        article = data[aidx]
        print('\t- Article Count=%d/%d' % (aidx + 1, num_articles))
        for pidx, paragraph in enumerate(article['paragraphs']):
            passage, qas = paragraph['context'], paragraph['qas']
            passage = passage.replace(u'\xa0', ' ')
            sentences = sent_tokenize(passage)

            sentence_tokens = [
                tokenizer.tokenize(sentence) for sentence in sentences
            ]
            raw_trees = [
                list(s)[0] for s in list(
                    parser.parse_sents(sentence_tokens, verbose=True))
            ]
            squad_tree = TreePassage(raw_trees)

            for qidx, qa in enumerate(qas):
                question_sentences = sent_tokenize(qa['question'])
                question_tokens = []
                for s in question_sentences:
                    question_tokens += tokenizer.tokenize(s)

                batches.append(
                    Batch([{
                        'apid': 'apid',
                        'qa_id': qa['id'],
                        'context_squad_tree': squad_tree,
                        'question_tokens': question_tokens,
                        'answers': [],
                        'is_impossible': 0
                    }], False))

                qaid_map[qa['id']] = paragraph['context']

    span_extractor.eval()
    answer_verifier.eval()
    for idx, batch in enumerate(batches):
        qa_id = batch.qa_id[0]

        node_scores, expected_f1s, global_answer_score = span_extractor(
            batch, eval_system=True)
        score_confidence, predicted_node_idxs = node_scores.max(dim=1)
        score_confidence, predicted_node_idxs = (variable_to_numpy(
            score_confidence,
            False), variable_to_numpy(predicted_node_idxs, False))

        # Answer score = predicted has answer probability
        answer_score = answer_verifier(batch,
                                       predicted_node_idxs=predicted_node_idxs,
                                       eval_system=True)
        answer_proba = variable_to_numpy(
            Sigmoid()(answer_score),
            False)  # convert from tensor to numpy array
        global_answer_proba = variable_to_numpy(Sigmoid()(global_answer_score),
                                                False)

        has_answer_proba = (0.3 * score_confidence +
                            0.4 * global_answer_proba + 0.3 * answer_proba)[0]

        predicted_span = batch.trees[0].span(predicted_node_idxs[0])
        predicted_has_answer = has_answer_proba >= HAS_ANSWER_THRESHOLD

        predicted_text = tokens_to_text(predicted_span, qaid_map[qa_id])
        official_eval[qa_id] = predicted_text if predicted_has_answer else ''
        official_eval_tokens[qa_id] = ' '.join(
            predicted_span) if predicted_has_answer else ''

    json.dump(official_eval, open(outpath, 'w'))
Esempio n. 8
0
 def __init__(self):
     super(Net, self).__init__()
     self.hidden_feature = 600
     self.linear1 = Linear(345, self.hidden_feature)
     self.sigmoid1 = Sigmoid()
     self.linear2 = Linear(self.hidden_feature, 30)
Esempio n. 9
0
def torch_fn():
    """Create a sigmoid layer in torch."""
    return Sigmoid()
def main():
    parser = argparse.ArgumentParser()

    ## Required parameters
    parser.add_argument("--data_dir",
                        default=None,
                        type=str,
                        required=True,
                        help="The input data dir. Should contain the .tsv files (or other data files) for the task.")
    parser.add_argument("--bert_model", default="bert-base-uncased", type=str, required=False,
                        help="Bert pre-trained model selected in the list: bert-base-uncased, "
                        "bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, "
                        "bert-base-multilingual-cased, bert-base-chinese.")
    parser.add_argument("--bert_model_path", default="", type=str, required=False,
                        help="Bert pretrained saved pytorch model path.")
    parser.add_argument("--reformer_model_path", default=None, type=str, required=False,
                        help="Bert pretrained saved pytorch model path.")
    parser.add_argument("--experiment", default="attention", type=str, required=False,
                        help="4 types: attention, base, long, ablation. "
                        "base: original bert"
                        "long: uses an lstm to keep track of all bert hidden representations, but backprop over the first"
                        "attention: uses an lstm + attention mechanism to backprop over more than the first representation"
                        "ablation: concat all the hidden representations"
                        )
    parser.add_argument("--model_name_or_path", default="bert-base-uncased", type=str, required=True)
    parser.add_argument("--task_name",
                        default=None,
                        type=str,
                        required=True,
                        help="The name of the task to train.")
    parser.add_argument("--output_dir",
                        default=None,
                        type=str,
                        required=True,
                        help="The output directory where the model predictions and checkpoints will be written.")
    parser.add_argument("--reformer_hashes", default=4, type=int, help="Reformer hash buckets")

    ## Other parameters
    parser.add_argument("--cache_dir",
                        default="",
                        type=str,
                        help="Where do you want to store the pre-trained models downloaded from s3")
    parser.add_argument("--max_seq_length",
                        default=128,
                        type=int,
                        help="The maximum total input sequence length after WordPiece tokenization. \n"
                             "Sequences longer than this will be truncated, and sequences shorter \n"
                             "than this will be padded.")
    parser.add_argument("--max_tokens",
                        default=16384,
                        type=int,
                        help="The total tokens for ease of processing")
    parser.add_argument("--token_shift",
                        default=200,
                        type=int,
                        help="")
    parser.add_argument("--do_train",
                        action='store_true',
                        help="Whether to run training.")
    parser.add_argument("--do_pos_encoding",
                        action='store_true',
                        help="train a model with positional coding.")
    parser.add_argument("--do_min_att",
                        action='store_true',
                        help="ensure attention has a minimal alpha.")
    parser.add_argument("--do_truncate",
                        action='store_true',
                        help="Whether to run training.")
    parser.add_argument("--do_eval",
                        action='store_true',
                        help="Whether to run eval on the dev set.")
    parser.add_argument("--do_lower_case",
                        action='store_true',
                        help="Set this flag if you are using an uncased model.")
    parser.add_argument("--train_batch_size",
                        default=32,
                        type=int,
                        help="Total batch size for training.")
    parser.add_argument("--eval_batch_size",
                        default=16,
                        type=int,
                        help="Total batch size for eval.")
    parser.add_argument("--learning_rate",
                        default=2e-5,
                        type=float,
                        help="The initial learning rate for Adam.")
    parser.add_argument("--warmup_proportion",
                        default=0.1,
                        type=float,
                        help="Proportion of training to perform linear learning rate warmup for. "
                             "E.g., 0.1 = 10%% of training.")
    parser.add_argument("--no_cuda",
                        action='store_true',
                        help="Whether not to use CUDA when available")
    parser.add_argument('--overwrite_output_dir',
                        action='store_true',
                        help="Overwrite the content of the output directory")
    parser.add_argument("--local_rank",
                        type=int,
                        default=-1,
                        help="local_rank for distributed training on gpus")
    parser.add_argument('--seed',
                        type=int,
                        default=42,
                        help="random seed for initialization")
    parser.add_argument("--max_epochs",
                        default=20,
                        type=float,
                        help="Proportion of training to perform linear learning rate warmup for. ")
    parser.add_argument("--warmup_epochs",
                        default=1.0,
                        type=float,
                        help="Proportion of training to perform linear learning rate warmup for. ")
    parser.add_argument("--patience",
                        default=3.0,
                        type=float,
                        help="Total number of training epochs to perform.")
    parser.add_argument("--val_split",
                        default=0.05,
                        type=float,
                        help="Proportion of training to perform linear learning rate warmup for. "
                             "E.g., 0.1 = 10%% of training.")
    parser.add_argument('--gradient_accumulation_steps',
                        type=int,
                        default=1,
                        help="Number of updates steps to accumulate before performing a backward/update pass.")
    parser.add_argument('--fp16',
                        action='store_true',
                        help="Whether to use 16-bit float precision instead of 32-bit")
    parser.add_argument('--loss_scale',
                        type=float, default=0,
                        help="Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.\n"
                             "0 (default value): dynamic loss scaling.\n"
                             "Positive power of 2: static loss scaling value.\n")
    parser.add_argument('--server_ip', type=str, default='', help="Can be used for distant debugging.")
    parser.add_argument('--server_port', type=str, default='', help="Can be used for distant debugging.")
    args = parser.parse_args()
    save_args = parser.parse_args()

    if args.server_ip and args.server_port:
        # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
        import ptvsd
        print("Waiting for debugger attach")
        ptvsd.enable_attach(address=(args.server_ip, args.server_port), redirect_output=True)
        ptvsd.wait_for_attach()

    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
        n_gpu = torch.cuda.device_count()
    else:
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        n_gpu = 1
        # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.distributed.init_process_group(backend='nccl')
    args.device = device

    logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                        datefmt = '%m/%d/%Y %H:%M:%S',
                        level = logging.INFO if args.local_rank in [-1, 0] else logging.WARN)

    logger.info("device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}".format(
        device, n_gpu, bool(args.local_rank != -1), args.fp16))

    if args.gradient_accumulation_steps < 1:
        raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format(
                            args.gradient_accumulation_steps))

    args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if n_gpu > 0:
        torch.cuda.manual_seed_all(args.seed)

    if not args.do_train and not args.do_eval:
        raise ValueError("At least one of `do_train` or `do_eval` must be True.")

    if os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train and not args.overwrite_output_dir:
        raise ValueError("Output directory ({}) already exists and is not empty.".format(args.output_dir))
    if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
        os.makedirs(args.output_dir)

    task_name = args.task_name.lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name]()
    output_mode = output_modes[task_name]

    label_list = processor.get_labels()
    num_labels = len(label_list)

    if args.local_rank not in [-1, 0]:
        torch.distributed.barrier()  # Make sure only the first process in distributed training will download model & vocab

    tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case)
    
    cls_token = tokenizer.convert_tokens_to_ids(["[CLS]"])
    sep_token = tokenizer.convert_tokens_to_ids(["[SEP]"])

    model = get_model(args, num_labels, len(tokenizer.vocab), cls_token, sep_token, args.token_shift)
    

    if args.bert_model_path != "":
        print("Loading model from: " + args.bert_model_path)
        if args.do_train:
            pretrained_dict = torch.load(os.path.join(args.bert_model_path,"pytorch_model.bin"))
            model_dict = model.state_dict()
            # 1. filter out unnecessary keys
            pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
            if 'classifier1.weight' in pretrained_dict:# and pretrained_dict['classifier1.weight'].shape[0] != num_labels:
                del pretrained_dict['classifier1.weight']
                del pretrained_dict['classifier1.bias']
            '''if 'classifier2.weight' in pretrained_dict and pretrained_dict['classifier2.weight'].shape[0] != num_labels:
                del pretrained_dict['classifier2.weight']
                del pretrained_dict['classifier2.bias']'''
            # 2. overwrite entries in the existing state dict
            model_dict.update(pretrained_dict) 
            # 3. load the new state dict
            model.load_state_dict(model_dict)
        else:
            model.load_state_dict(torch.load(args.bert_model_path))



    sig = Sigmoid()
    if args.local_rank == 0:
        torch.distributed.barrier()

    if args.fp16:
        model.half()
    model.to(device)
    if args.local_rank != -1:
        model = torch.nn.parallel.DistributedDataParallel(model,
                                                          device_ids=[args.local_rank],
                                                          output_device=args.local_rank,
                                                          find_unused_parameters=True)
    elif n_gpu > 1:
        model = torch.nn.DataParallel(model)

    global_step = 0
    nb_tr_steps = 0
    tr_loss = 0
   
    loss_fct = CrossEntropyLoss()
    if args.do_train:
        if args.local_rank in [-1, 0]:
            tb_writer = SummaryWriter()


        UC = "" if args.do_lower_case else "UC"
        cached_train_features_file = os.path.join(args.data_dir, 'train_{0}_{1}_{2}{3}'.format(
        list(filter(None, args.bert_model.split('/'))).pop(),
                    str(task_name),
                    str(args.max_tokens),
                    UC))

        # Prepare data loader
        logger.info("Loading training dataset")
        train_data = load_dataset(cached_train_features_file, args, processor, tokenizer, output_mode, data_type="train")
            
        if args.task_name == "arxiv":
            logger.info("Loading validation dataset")
            cached_val_features_file = os.path.join(args.data_dir, 'train_{0}_{1}_{2}{3}'.format(
            list(filter(None, args.bert_model.split('/'))).pop(),
                        str(task_name),
                        str(args.max_tokens),
                        UC))
            val_data = load_dataset(cached_val_features_file, args, processor, tokenizer, output_mode, data_type="val")
        else:
            logger.info("Spliting train dataset into validation dataset")
            train_data1, train_data2, train_data3 =  train_data.tensors
            #random.shuffle(train_data)
            rand = torch.randperm(train_data1.shape[0])
            train_data1 = train_data1[rand]
            train_data2 = train_data2[rand]
            train_data3 = train_data3[rand]

            val_size = int(train_data1.shape[0] * args.val_split)
            val_data1 = train_data1[:val_size]
            val_data2 = train_data2[:val_size]
            val_data3 = train_data3[:val_size]

            train_data1 = train_data1[val_size:]
            train_data2 = train_data2[val_size:]
            train_data3 = train_data3[val_size:]

            train_data = TensorDataset(train_data1, train_data2, train_data3)
            val_data = TensorDataset(val_data1, val_data2, val_data3)




        if args.local_rank == -1:
            train_sampler = RandomSampler(train_data)
            val_sampler = RandomSampler(val_data)
        else:
            train_sampler = DistributedSampler(train_data)
            val_sampler = DistributedSampler(val_data)
        train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=args.train_batch_size)
        val_dataloader = DataLoader(val_data, sampler=val_sampler, batch_size=args.eval_batch_size)


        num_train_optimization_steps = (len(train_dataloader)) // args.gradient_accumulation_steps * args.max_epochs

        # Prepare optimizer

        param_optimizer = list(model.named_parameters())
        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [
            {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
            {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
            ]
        if args.fp16:
            try:
                from apex.optimizers import FP16_Optimizer
                from apex.optimizers import FusedAdam
            except ImportError:
                raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.")

            optimizer = FusedAdam(optimizer_grouped_parameters,
                                  lr=args.learning_rate,
                                  bias_correction=False,
                                  max_grad_norm=1.0)
            if args.loss_scale == 0:
                optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True)
            else:
                optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.loss_scale)
            warmup_linear = WarmupLinearSchedule(warmup=(args.warmup_epochs / args.max_epochs),
                                                 t_total=num_train_optimization_steps)


        else:
            optimizer = BertAdam(optimizer_grouped_parameters,
                                 lr=args.learning_rate,
                                 warmup=(args.warmup_epochs / args.max_epochs),
                                 t_total=num_train_optimization_steps)

        logger.info("***** Running training *****")
        #logger.info("  Num examples = %d", len(train_examples))
        logger.info("  Batch size = %d", args.train_batch_size)
        logger.info("  Num steps = %d", num_train_optimization_steps)

        best_val_loss = 90999990.0
        patience = 0
        val_losses = []
        output_model_file = os.path.join(args.output_dir, WEIGHTS_NAME)
        e_iter = trange(int(args.max_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0])
        for i, _ in enumerate(e_iter):
            torch.cuda.empty_cache()
            model.train()
            tr_loss = 0
            nb_tr_examples, nb_tr_steps = 0, 0
            t_iter = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
            for step, t_batch in enumerate(t_iter):
                input_ids, input_mask,  label_ids = get_batch(args, t_batch, device, cls_token)
                outputs = model(input_ids, input_mask, labels=label_ids)
                loss = outputs[0]   # model outputs are always tuple in transformers (see doc)
                
                if n_gpu > 1:
                    loss = loss.mean()

                if args.gradient_accumulation_steps > 1:
                    loss = loss / args.gradient_accumulation_steps

                if args.fp16:
                    optimizer.backward(loss)
                else:
                    loss.backward()

                tr_loss += loss.item()
                nb_tr_examples += input_ids.size(0)
                nb_tr_steps += 1
                if (step + 1) % args.gradient_accumulation_steps == 0:
                    if args.fp16:
                        # modify learning rate with special warm up BERT uses
                        # if args.fp16 is False, BertAdam is used that handles this automatically
                        lr_this_step = args.learning_rate * warmup_linear.get_lr(global_step, args.warmup_proportion)
                        for param_group in optimizer.param_groups:
                            param_group['lr'] = lr_this_step

                    optimizer.step()
                    optimizer.zero_grad()

                    global_step += 1
                    if args.local_rank in [-1, 0]:
                        # loading gpus takes a while the first iteration, get a better estimate this way
                        if i == 0 and step == 0: 
                            t_iter.start_t = time.time()
                            e_iter.start_t = time.time()

                        acc = np.sum(np.argmax(outputs[1].cpu().detach().numpy(), axis=1) == label_ids.cpu().numpy()) / label_ids.shape[0]
                        t_iter.set_description("loss{0:.3f},acc{1:.3f}".format(loss, acc))
                        tb_writer.add_scalar('lr', optimizer.get_lr()[0], global_step)
                        tb_writer.add_scalar('loss', loss.item(), global_step)
                        tb_writer.add_scalar('acc', acc, global_step)

            # input_ids;del input_mask;del label_ids;del outputs
            torch.cuda.empty_cache()
            model.eval()
            val_loss = 0
            out_label_ids = None
            with torch.no_grad():
                for v_batch in tqdm(val_dataloader, desc="valuating"):
                    input_ids, input_mask,  label_ids = get_batch(args, v_batch, device, cls_token)
                    outputs = model(input_ids, input_mask, labels=label_ids)
                    loss = outputs[0]   # model outputs are always tuple in transformers (see doc)
                    
                    if n_gpu > 1:  loss = loss.mean()
                    val_loss += loss.item()
            #del input_ids;del input_mask;del label_ids;del outputs
            val_losses.append(val_loss)
            #end training iter
            if val_loss < best_val_loss and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
                best_val_loss = val_loss
                patience = 0
                # Save a trained model, configuration and tokenizer
                model_to_save = model.module if hasattr(model, 'module') else model  # Only save the model it-self

                print("best epoch {} loss {}".format(i,best_val_loss))
            else:
                patience+=1
                if patience >= args.patience:
                    break



    ### Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()
    ### Example:
    #model = model_to_save
    output_model_file = os.path.join(args.output_dir, WEIGHTS_NAME)

    if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
        # Save a trained model, configuration and tokenizer
        model_to_save = model.module if hasattr(model, 'module') else model  # Only save the model it-self

        # If we save using the predefined names, we can load using `from_pretrained`
        output_config_file = os.path.join(args.output_dir, CONFIG_NAME)

        #torch.save(model_to_save.state_dict(), output_model_file)
        if "reformer" not in args.experiment:
            model_to_save.config.to_json_file(output_config_file)
            tokenizer.save_vocabulary(args.output_dir)

            # Load a trained model and vocabulary that you have fine-tuned
            #model = BertForSequenceClassification.from_pretrained(args.output_dir, num_labels=num_labels)
            tokenizer = BertTokenizer.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case)

            # Good practice: save your training arguments together with the trained model
            output_args_file = os.path.join(args.output_dir, 'training_args.bin')
            torch.save(args, output_args_file)
            with open(os.path.join(args.output_dir,'commandline_args.txt'), 'w') as f:
                json.dump(save_args.__dict__, f, indent=2)
    else:
        model = get_model(args, num_labels, len(tokenizer.vocab), cls_token, sep_token, args.token_shift)
        model.load_state_dict(torch.load(output_model_file))
        model.to(device)
        if args.local_rank != -1:
            model = torch.nn.parallel.DistributedDataParallel(model,
                                                              device_ids=[args.local_rank],
                                                              output_device=args.local_rank,
                                                              find_unused_parameters=True)
        elif n_gpu > 1:
            model = torch.nn.DataParallel(model)



    ### Evaluation
    if args.do_eval and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
        UC = "" if args.do_lower_case else "UC"
        cached_eval_features_file = os.path.join(args.data_dir, 'dev_{0}_{1}_{2}{3}'.format(
            list(filter(None, args.bert_model.split('/'))).pop(),
                        str(task_name),
                        str(args.max_tokens),
                        UC))

        logger.info("Loading test dataset")
        eval_data =  load_dataset(cached_eval_features_file, args, processor, tokenizer, output_mode, data_type = "test")
        eval_data_long = []
        eval_data_short = []
        #import pdb; pdb.set_trace()
        '''for item in eval_data:
            if item[1].sum().item() <= args.max_seq_length -2:
                eval_data_short.append(item)
            else:
                eval_data_long.append(item)
            
        eval_data = eval_data_long'''

        logger.info("***** Running evaluation *****")
        #logger.info("  Num examples = %d", len(eval_examples))
        logger.info("  Batch size = %d", args.eval_batch_size)
        # Run prediction for full data
        if args.local_rank == -1:
            eval_sampler = SequentialSampler(eval_data)
        else:
            eval_sampler = DistributedSampler(eval_data)  # Note that this sampler samples randomly
        eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.eval_batch_size)

        model.eval()
        eval_loss = 0
        nb_eval_steps = 0
        preds = []
        out_label_ids = None

        model.token_shift = args.token_shift
        torch.cuda.empty_cache()
        for t_batch in tqdm(eval_dataloader, desc="Evaluating"):

            input_ids, input_mask,  label_ids = get_batch(args, t_batch, device, cls_token)
            with torch.no_grad():
                outputs = model(input_ids, input_mask, labels = label_ids)
            
            tmp_eval_loss, logits = outputs[:2]
            
            eval_loss += tmp_eval_loss.mean().item()
            nb_eval_steps += 1

            if len(preds) == 0:
                preds.append(logits.detach().cpu().numpy())
                out_label_ids = label_ids.detach().cpu().numpy()
            else:
                preds[0] = np.append(
                    preds[0], logits.detach().cpu().numpy(), axis=0)
                out_label_ids = np.append(
                    out_label_ids, label_ids.detach().cpu().numpy(), axis=0)

        eval_loss = eval_loss / nb_eval_steps
        preds = preds[0]

        if output_mode == "classification":
            preds = np.argmax(preds, axis=1)
        elif output_mode == "regression":
            preds = np.squeeze(preds)
        elif output_mode == "multi_classification":
            preds = preds > .5
        result = compute_metrics(task_name, preds, out_label_ids)

        loss = tr_loss/global_step if args.do_train else None

        result['eval_loss'] = eval_loss
        result['global_step'] = global_step
        result['loss'] = loss

        with open(os.path.join(args.output_dir, "eval_results.txt"), "w") as writer:
            logger.info("***** Eval results *****")
            for key in sorted(result.keys()):
                logger.info("  %s = %s", key, str(result[key]))
                writer.write("%s = %s\n" % (key, str(result[key])))

        with open(os.path.join(args.output_dir, 'val_loss.txt'), 'w') as f:
            for item in val_losses:
                f.write("%s\n" % item)

        acc = result['acc']
        with open(os.path.join(args.output_dir, "results.csv"), "w") as writer:
            writer.write(f"{args.task_name}, {args.experiment}, {args.model_name_or_path[13:]},{args.learning_rate},{args.reformer_hashes},{acc}\n")
    with open(testDataName, 'rb') as f:
        A = pickle.load(f)

    loader = Data.DataLoader(dataset=A,
                             batch_size=BATCHSIZE,
                             collate_fn=A.collate_fn)
    tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')
    model = BertLinear.from_pretrained('bert-base-chinese')

    #modelName = config["checkpoint"] + 'BertLinear1000.pt'

    model.load_state_dict(torch.load(modelName))
    model = model.to(device)
    model.eval()
    f = Sigmoid()

    to_Write = {}
    with torch.no_grad():
        with open(predictName, 'w') as f_predict:
            for batch in tqdm(loader):
                questionId = batch['id']
                #print(questionId)
                X = batch['input_ids']
                X = torch.tensor(X).to(device)
                print('X', X)

                token_type_ids = batch['token_type_ids']
                token_type_ids = torch.tensor(token_type_ids).to(device)
                attention_mask = batch['attention_mask']
                attention_mask = torch.tensor(attention_mask).to(device)
Esempio n. 12
0
    def train(self, verbose=True):
        # grab training params
        BATCH_SIZE = self.training_params['BATCH_SIZE']
        TRAINING_ITERATIONS = self.training_params['TRAINING_ITERATIONS']
        BATCH_SIZE = self.training_params['BATCH_SIZE']
        CHECKPOINT_AFTER = self.training_params['CHECKPOINT_AFTER']
        SAVEPOINT_AFTER = self.training_params['SAVEPOINT_AFTER']
        TEST_BATCH_SIZE = self.training_params['TEST_BATCH_SIZE']

        model = self.model

        X = self.features[:self.num_train]
        Y = self.labels[:self.num_train]

        # See: https://discuss.pytorch.org/t/multi-label-classification-in-pytorch/905/45
        training_loss = torch.nn.BCEWithLogitsLoss()
        opt = optim.Adam(model.parameters(), lr=3e-4, weight_decay=0.001)

        itr = 1
        for epoch in range(
                TRAINING_ITERATIONS):  # loop over the dataset multiple times
            t0 = time.time()
            running_loss = 0.0
            rand_idx = list(np.arange(0, X.shape[0] - 1))
            random.shuffle(rand_idx)

            # Sample all data points
            indices = [
                rand_idx[ii * BATCH_SIZE:(ii + 1) * BATCH_SIZE]
                for ii in range((len(rand_idx) + BATCH_SIZE - 1) // BATCH_SIZE)
            ]

            for ii, idx in enumerate(indices):
                # zero the parameter gradients
                opt.zero_grad()

                inputs = Variable(torch.from_numpy(
                    X[idx, :])).float().to(device=self.device)
                y_true = Variable(torch.from_numpy(
                    Y[idx, :])).float().to(device=self.device)

                # forward + backward + optimize
                outputs = model(inputs)
                loss = training_loss(outputs,
                                     y_true).float().to(device=self.device)
                loss.backward()
                opt.step()

                # print statistics\n",
                running_loss += loss.item()
                if itr % CHECKPOINT_AFTER == 0:
                    rand_idx = list(np.arange(0, X.shape[0] - 1))
                    random.shuffle(rand_idx)
                    test_inds = rand_idx[:TEST_BATCH_SIZE]
                    inputs = Variable(torch.from_numpy(
                        X[test_inds, :])).float().to(device=self.device)
                    y_out = Variable(torch.from_numpy(
                        Y[test_inds])).float().to(device=self.device)

                    # forward + backward + optimize
                    outputs = model(inputs)
                    loss = training_loss(outputs,
                                         y_out).float().to(device=self.device)
                    outputs = Sigmoid()(outputs).round()
                    accuracy = [
                        float(all(torch.eq(outputs[ii], y_out[ii])))
                        for ii in range(TEST_BATCH_SIZE)
                    ]
                    accuracy = np.mean(accuracy)
                    verbose and print("loss:   " + str(loss.item()) +
                                      " , acc: " + str(accuracy))

                if itr % SAVEPOINT_AFTER == 0:
                    torch.save(model.state_dict(), self.model_fn)
                    verbose and print('Saved model at {}'.format(
                        self.model_fn))
                    # writer.add_scalar('Loss/train', running_loss, epoch)

                itr += 1
            verbose and print('Done with epoch {} in {}s'.format(
                epoch,
                time.time() - t0))

        torch.save(model.state_dict(), self.model_fn)
        print('Saved model at {}'.format(self.model_fn))

        print('Done training')
Esempio n. 13
0
    def __init__(self,
                 num_channels=32,
                 feat_channels=[64, 128, 256, 512, 1024],
                 residual='conv'):

        # residual: conv for residual input x through 1*1 conv across every layer for downsampling, None for removal of residuals

        super(UNet3D, self).__init__()

        # Encoder downsamplers
        self.pool1 = MaxPool3d((1, 2, 2))
        self.pool2 = MaxPool3d((1, 2, 2))
        self.pool3 = MaxPool3d((1, 2, 2))
        self.pool4 = MaxPool3d((1, 2, 2))

        # Encoder convolutions
        self.conv_blk1 = Conv3D_Block(num_channels,
                                      feat_channels[0],
                                      residual=residual)
        self.conv_blk2 = Conv3D_Block(feat_channels[0],
                                      feat_channels[1],
                                      residual=residual)
        self.conv_blk3 = Conv3D_Block(feat_channels[1],
                                      feat_channels[2],
                                      residual=residual)
        self.conv_blk4 = Conv3D_Block(feat_channels[2],
                                      feat_channels[3],
                                      residual=residual)
        self.conv_blk5 = Conv3D_Block(feat_channels[3],
                                      feat_channels[4],
                                      residual=residual)

        # Decoder convolutions
        self.dec_conv_blk4 = Conv3D_Block(2 * feat_channels[3],
                                          feat_channels[3],
                                          residual=residual)
        self.dec_conv_blk3 = Conv3D_Block(2 * feat_channels[2],
                                          feat_channels[2],
                                          residual=residual)
        self.dec_conv_blk2 = Conv3D_Block(2 * feat_channels[1],
                                          feat_channels[1],
                                          residual=residual)
        self.dec_conv_blk1 = Conv3D_Block(2 * feat_channels[0],
                                          feat_channels[0],
                                          residual=residual)

        # Decoder upsamplers
        self.deconv_blk4 = Deconv3D_Block(feat_channels[4], feat_channels[3])
        self.deconv_blk3 = Deconv3D_Block(feat_channels[3], feat_channels[2])
        self.deconv_blk2 = Deconv3D_Block(feat_channels[2], feat_channels[1])
        self.deconv_blk1 = Deconv3D_Block(feat_channels[1], feat_channels[0])

        # Final 1*1 Conv Segmentation map
        self.one_conv = Conv3d(feat_channels[0],
                               num_channels,
                               kernel_size=1,
                               stride=1,
                               padding=0,
                               bias=True)

        # Activation function
        self.sigmoid = Sigmoid()
Esempio n. 14
0
 def __init__(self, embedding_dim, bottleneck_dim, input_channels, output_channels, kernel, 
              dropout=0.0, activation='identity', dilation=1, groups=1, batch_norm=True):
     super(HighwayConvBlockGenerated, self).__init__(embedding_dim, bottleneck_dim, input_channels, 2*output_channels, kernel, 
                                                     dropout, activation, dilation, groups, batch_norm)
     self._gate = Sigmoid()
Esempio n. 15
0
 def __init__(self):
     super().__init__()
     self.sig = Sigmoid()
     self.loss = BCELoss(reduction='sum')
 def __init__(self):
     super(LinearClassifier, self).__init__()
     self.fully_connected = Linear(2, 1)
     self.sigmoid = Sigmoid()
Esempio n. 17
0
#! /usr/bin/env python3

from collections import OrderedDict

from context import archetypes
from context import utensils

from utensils.datasets import MnistDataset
from archetypes.autoencoder import Autoencoder

import torch
from torch.nn import Linear, Sigmoid, LeakyReLU

sigmoid = Sigmoid()
leaky_relu = LeakyReLU()
mse = torch.nn.MSELoss(reduction='sum')

bs = 256
shuf = True
nepochs = 1

data_home = "/home/jamc/Data/MNIST_data"
image_fn = f"{data_home}/train-images-idx3-ubyte.gz"
label_fn = f"{data_home}/train-labels-idx1-ubyte.gz"

dataset = MnistDataset(image_fn, label_fn, shape=(-1, ))

training = torch.utils.data.DataLoader(dataset, batch_size=bs, shuffle=shuf)

encoder = OrderedDict(
    (('Hidden_Layer_1', Linear(dataset.images.shape[-1],
 def beta(self, state_index):
     # Input : index in [0, n_states - 1]
     # Return : beta(state), variable of shape (1)
     state_var = self.varFromStateIndex(state_index)
     return Sigmoid()(torch.matmul(state_var, self.upsilon))
def evaluate(model: PreTrainedModel, dataloader: DataLoader,
             device: str) -> (int, List[int], List[int]):
    """
    Evaluates a Bert Model on a labelled data set.

    Args:
        model: the BertModel to be evaluated
        dataloader: the DataLoader with the test data
        device: the device where evaluation will take place ("cpu" or "cuda")

    Returns: a tuple with (the evaluation loss, a list with the correct labels,
            and a list with the predicted labels)

    """

    model.eval()

    eval_loss = 0
    nb_eval_steps = 0
    predicted_labels, correct_labels = [], []

    for step, batch in enumerate(tqdm(dataloader,
                                      desc="Evaluation iteration")):
        batch = tuple(t.to(device) for t in batch)
        input_ids, input_mask, segment_ids, label_ids = batch

        with torch.no_grad():
            if type(model) == BertForSequenceClassification or type(
                    model) == BertForMultiLabelSequenceClassification:
                tmp_eval_loss, logits = model(input_ids,
                                              attention_mask=input_mask,
                                              token_type_ids=segment_ids,
                                              labels=label_ids)
            elif type(model) == DistilBertForSequenceClassification:
                tmp_eval_loss, logits = model(input_ids,
                                              attention_mask=input_mask,
                                              labels=label_ids)

        if type(model) == BertForSequenceClassification or type(
                model) == DistilBertForSequenceClassification:
            outputs = np.argmax(logits.to('cpu'), axis=1)
            label_ids = label_ids.to('cpu').numpy()
            predicted_labels += list(outputs)

        elif type(model) == BertForMultiLabelSequenceClassification:
            sig = Sigmoid()
            outputs = sig(logits).to('cpu').numpy()
            label_ids = label_ids.to('cpu').numpy()
            predicted_labels += list(outputs >= 0.5)

        correct_labels += list(label_ids)

        eval_loss += tmp_eval_loss.mean().item()
        nb_eval_steps += 1

    eval_loss = eval_loss / nb_eval_steps

    correct_labels = np.array(correct_labels)
    predicted_labels = np.array(predicted_labels)

    return eval_loss, correct_labels, predicted_labels
def define_model(A, X):
    model = Sequential(Layer(A, X.shape[1], 4, 'tanh'), Layer(A, 4, 2, 'tanh'),
                       Linear(2, 1, ' '), Sigmoid())

    return model
 def __init__(self, inplace=True):
     super(h_swish, self).__init__()
     self.sigmoid = Sigmoid()
Esempio n. 22
0
 def __init__(self):
     super(SigmoidMAELoss, self).__init__()
     from torch.nn import Sigmoid
     self.__sigmoid__ = Sigmoid()
     self.__l1_loss__ = MSELoss()
Esempio n. 23
0
 def forward(self, x, m):
     x = m * self.main_net(x)
     if isinstance(self, BaseClassifier):
         out = Sigmoid()(x)
     out = out.view(-1, 1)
     return None, out
Esempio n. 24
0
def eval_on_model(args):
    device = args.device
    if device == 'cpu':
        raise NotImplementedError("CPU training is not implemented.")
    device = torch.device(args.device)
    torch.cuda.set_device(device)

    # build model
    model = build_model(args)
    model.to(device)

    # output dir
    p_out = Path(
        args.p_out).joinpath(f"{model.name}-{args.tensorboard_exp_name}")
    if not p_out.exists():
        p_out.mkdir(exist_ok=True, parents=True)

    # dataset & loader
    annotation = pd.read_csv(args.annotation_file)
    query = annotation[annotation.mp3_path.str.match('/'.join(
        args.audio_file.split('/')[-2:]))]
    assert query.shape[0] != 0, f"Cannot find the audio file: {args.audio_file}"
    # split audio info and segment audio
    threshold = args.eval_threshold
    song_info = query[query.columns.values[50:]]
    tags = query.columns.values[:50]
    labels = query[tags].values[0]
    label_names = tags[labels.astype(bool)]
    segments = _segment_audio(_load_audio(args.audio_file, sample_rate=22050),
                              n_samples=59049)
    LOG.info(f"Song info: {song_info}")
    LOG.info(f"Number of segments: {len(segments)}")
    LOG.info(f"Ground truth tags: {label_names}")
    LOG.info(f"Positive tag threshold: {threshold}")

    # create loss
    loss_fn = get_loss(args.loss)

    # load checkpoint OR init state_dict
    if args.checkpoint is not None:
        state_dict = load_ckpt(args.checkpoint,
                               reset_epoch=args.ckpt_epoch,
                               no_scheduler=args.ckpt_no_scheduler,
                               no_optimizer=args.ckpt_no_optimizer,
                               no_loss_fn=args.ckpt_no_loss_fn,
                               map_values=args.ckpt_map_values)
        model_dict = {'model': model} if 'model' in state_dict else None
        apply_state_dict(state_dict, model=model_dict)
        best_val_loss = state_dict['val_loss']
        epoch = state_dict['epoch']
        global_i = state_dict['global_i']
        LOG.info(
            f"Checkpoint loaded. Epoch trained {epoch}, global_i {global_i}, best val {best_val_loss:.6f}"
        )
    else:
        raise AssertionError("Pre-trained checkpoint must be provided.")

    # start testing
    model.eval()
    sigmoid = Sigmoid().to(device)
    t_start = time.time()

    # concatenate segments
    segments = torch.from_numpy(
        np.concatenate([seg.reshape(1, 1, -1) for seg in segments
                        ])).to(torch.float32).cuda(device=device)
    targets = torch.from_numpy(np.concatenate(
        [labels.reshape(1, -1)] * 10)).to(torch.float32).cuda(device=device)

    # forward pass
    with torch.no_grad():
        logits = model(segments)
        out = sigmoid(logits)
        loss = loss_fn(logits, targets)

    out = out.cpu().numpy()
    out[out > threshold] = 1
    out[out <= threshold] = 0
    out = np.sum(out, axis=0)
    res = pd.DataFrame(data={'tags': tags, 'freq': out})
    res = res[res.freq != 0].sort_values(by='freq', ascending=False)
    CONSOLE.print(res)

    LOG.info(f"Testing speed: {time.time() - t_start:.4f}s, "
             f"loss: {loss.item()}, ")
    return
Esempio n. 25
0
    def __init__(self, name, in_size, device):
        super(FCN, self).__init__()

        assert (in_size % 16 == 0)

        self.name = name
        self.in_size = in_size
        self.device = device

        self.convBlock1 = Sequential(
            Conv2d(in_channels=3, kernel_size=5, out_channels=32, stride=2, padding=2),
            BatchNorm2d(num_features=32, momentum=0.1),
            ReLU(inplace=True),

            Conv2d(in_channels=32, kernel_size=3, out_channels=32, stride=1, padding=1),
            BatchNorm2d(num_features=32, momentum=0.1),
            ReLU(inplace=True)
        )

        self.upsampling1 = ConvTranspose2d(in_channels=32, kernel_size=int(self.in_size / 2) + 1, out_channels=1,
                                           stride=1,
                                           padding=0)

        self.pool1 = MaxPool2d(kernel_size=2, stride=2)

        self.convBlock2 = Sequential(
            Conv2d(in_channels=32, kernel_size=3, out_channels=64, stride=1, padding=1),
            BatchNorm2d(num_features=64, momentum=0.1),
            ReLU(inplace=True),

            Conv2d(in_channels=64, kernel_size=3, out_channels=64, stride=1, padding=1),
            BatchNorm2d(num_features=64, momentum=0.1),
            ReLU(inplace=True)
        )

        self.upsampling2 = ConvTranspose2d(in_channels=64, kernel_size=3 * int(self.in_size / 4) + 1, out_channels=1,
                                           stride=1, padding=0)

        self.pool2 = MaxPool2d(kernel_size=2, stride=2)

        self.convBlock3 = Sequential(
            Conv2d(in_channels=64, kernel_size=3, out_channels=96, stride=1, padding=1),
            BatchNorm2d(num_features=96, momentum=0.1),
            ReLU(inplace=True),

            Conv2d(in_channels=96, kernel_size=3, out_channels=96, stride=1, padding=1),
            BatchNorm2d(num_features=96, momentum=0.1),
            ReLU(inplace=True)
        )

        self.upsampling3 = ConvTranspose2d(in_channels=96, kernel_size=7 * int(self.in_size / 8) + 1, out_channels=1,
                                           stride=1, padding=0)

        self.pool3 = MaxPool2d(kernel_size=2, stride=2)

        self.convBlock4 = Sequential(
            Conv2d(in_channels=96, kernel_size=3, out_channels=128, stride=1, padding=1),
            BatchNorm2d(num_features=128, momentum=0.1),
            ReLU(inplace=True),

            Conv2d(in_channels=128, kernel_size=3, out_channels=128, stride=1, padding=1),
            BatchNorm2d(num_features=128, momentum=0.1),
            ReLU(inplace=True)
        )

        self.upsampling4 = ConvTranspose2d(in_channels=128, kernel_size=15 * int(self.in_size / 16) + 1, out_channels=1,
                                           stride=1, padding=0)

        self.convScore = Sequential(
            Conv2d(in_channels=4, kernel_size=1, out_channels=1, stride=1, padding=0),
            Sigmoid()
        )

        self = self.to(device)

        self.optimizer = SGD(self.parameters(), lr=LR_SGD, momentum=MOMENTUM_SGD,
                             nesterov=True, weight_decay=WD_SGD)
Esempio n. 26
0
def test_on_model(args):
    device = args.device
    if device == 'cpu':
        raise NotImplementedError("CPU training is not implemented.")
    device = torch.device(args.device)
    torch.cuda.set_device(device)

    # build model
    model = build_model(args)
    model.to(device)

    # output dir
    p_out = Path(
        args.p_out).joinpath(f"{model.name}-{args.tensorboard_exp_name}")
    if not p_out.exists():
        p_out.mkdir(exist_ok=True, parents=True)

    # dataset & loader
    test_dataset = MTTDataset(path=args.p_data, split='test')
    test_loader = DataLoader(test_dataset,
                             batch_size=args.batch_size,
                             shuffle=True,
                             num_workers=args.n_workers,
                             pin_memory=True,
                             drop_last=False)  # not dropping last in testing
    test_steps = test_dataset.calc_steps(
        args.batch_size, drop_last=False)  # not dropping last in testing
    LOG.info(f"Total testing steps: {test_steps}")
    LOG.info(f"Testing data size: {len(test_dataset)}")

    # create loss
    loss_fn = get_loss(args.loss)
    # create metric
    metric = AUCMetric()

    # load checkpoint OR init state_dict
    if args.checkpoint is not None:
        state_dict = load_ckpt(args.checkpoint,
                               reset_epoch=args.ckpt_epoch,
                               no_scheduler=args.ckpt_no_scheduler,
                               no_optimizer=args.ckpt_no_optimizer,
                               no_loss_fn=args.ckpt_no_loss_fn,
                               map_values=args.ckpt_map_values)
        model_dict = {'model': model} if 'model' in state_dict else None
        apply_state_dict(state_dict, model=model_dict)
        best_val_loss = state_dict['val_loss']
        epoch = state_dict['epoch']
        global_i = state_dict['global_i']
        LOG.info(
            f"Checkpoint loaded. Epoch trained {epoch}, global_i {global_i}, best val {best_val_loss:.6f}"
        )
    else:
        raise AssertionError("Pre-trained checkpoint must be provided.")

    # summary writer
    writer = SummaryWriter(log_dir=p_out.as_posix(), filename_suffix='-test')

    # start testing
    model.eval()
    sigmoid = Sigmoid().to(device)
    status_col = TextColumn("")
    running_loss = 0
    if args.data_normalization:
        fetcher = DataPrefetcher(test_loader,
                                 mean=MTT_MEAN,
                                 std=MTT_STD,
                                 device=device)
    else:
        fetcher = DataPrefetcher(test_loader,
                                 mean=None,
                                 std=None,
                                 device=device)
    samples, targets = fetcher.next()

    with Progress("[progress.description]{task.description}",
                  "[{task.completed}/{task.total}]",
                  BarColumn(),
                  "[progress.percentage]{task.percentage:>3.0f}%",
                  TimeRemainingColumn(),
                  TextColumn("/"),
                  TimeElapsedColumn(),
                  status_col,
                  expand=False,
                  console=CONSOLE,
                  refresh_per_second=5) as progress:
        task = progress.add_task(description=f'[Test]', total=test_steps)
        i = 0  # counter
        t_start = time.time()

        with torch.no_grad():
            while samples is not None:
                # forward model
                logits = model(samples)
                out = sigmoid(logits)
                test_loss = loss_fn(logits, targets)

                # collect running loss
                running_loss += test_loss.item()
                i += 1
                writer.add_scalar('Test/Loss', running_loss / i, i)

                # auc metric
                metric.step(targets.cpu().numpy(), out.cpu().numpy())

                # pre-fetch next samples
                samples, targets = fetcher.next()

                if not progress.finished:
                    status_col.text_format = f"Test loss: {running_loss/i:.06f}"
                    progress.update(task, advance=1)

    auc_tag, auc_sample, ap_tag, ap_sample = metric.auc_ap_score
    LOG.info(f"Testing speed: {(time.time() - t_start)/i:.4f}s/it, "
             f"auc_tag: {auc_tag:.04f}, "
             f"auc_sample: {auc_sample:.04f}, "
             f"ap_tag: {ap_tag:.04f}, "
             f"ap_sample: {ap_sample:.04f}")
    writer.close()
    return
Esempio n. 27
0
    def forward(self, input, target):
        if input.size(0) != target.size(0):
            raise RuntimeError('Input and target should have the same size '
                               'in the batch dimension.')

        # used_rows = 0
        batch_size = target.size(0)

        # output = input.new_zeros(batch_size)
        # gather_inds = target.new_empty(batch_size)

        total_cluster_loss = input.new_zeros(batch_size)

        head_onehot = target.new_zeros(batch_size, self.cutoffs[0])
        cluster_onehot = target.new_zeros(batch_size, self.n_clusters)

        cutoff_values = [0] + self.cutoffs
        for i in range(len(cutoff_values) - 1):

            low_idx = cutoff_values[i]
            high_idx = cutoff_values[i + 1]
            num_idx = high_idx - low_idx

            target_mask = (target >= low_idx) & (target < high_idx)
            target_mask_row = torch.sum(target_mask, dim=1)
            row_indices = target_mask_row.nonzero().squeeze()

            if row_indices.numel() == 0:
                continue

            input_subset = input.index_select(0, row_indices)
            target_onehot = self.get_multi_hot_label(target, target_mask,
                                                     row_indices, low_idx,
                                                     num_idx).detach()

            if i == 0:
                # indices =  row_indices.repeat(num_idx, 1).transpose(1,0)
                head_onehot.index_copy_(0, row_indices, target_onehot)

            else:
                head_output = self.head(input_subset)
                cluster_root_output = head_output[:,
                                                  self.shortlist_size + i - 1]

                sig_func = Sigmoid()
                # test = sig_func(cluster_root_output)
                cluster_root_output = torch.diag(sig_func(cluster_root_output))

                cluster_output = self.tail[i - 1](input_subset)

                # cluster_output = cluster_output * cluster_root_output
                cluster_output = torch.mm(cluster_root_output,
                                          sig_func(cluster_output))

                # cluster_index = self.shortlist_size + i - 1

                temp_onehot = target.new_zeros(batch_size).index_fill_(
                    0, row_indices, 1)
                cluster_onehot[:, i - 1] = temp_onehot

                # loss_fct = BCEWithLogitsLoss(reduction='none')
                loss_fct = BCELoss(reduction='none')

                loss = loss_fct(cluster_output.view(-1, num_idx),
                                target_onehot.view(-1, num_idx).float())
                loss = torch.sum(loss, dim=1)
                # total_cluster_loss = total_cluster_loss.scatter_add(0,row_indices,loss)
                temp_loss = input.new_zeros(batch_size)
                total_cluster_loss += temp_loss.index_copy_(
                    0, row_indices, loss)

        head_output = self.head(input)
        head_onehot = torch.cat((head_onehot, cluster_onehot), dim=1)
        loss_fct = BCEWithLogitsLoss(reduction='none')
        head_loss = loss_fct(head_output.view(-1, self.head_size),
                             head_onehot.view(-1, self.head_size).float())

        cluster_root_loss = head_loss[:, self.shortlist_size:]
        # temp_mask = head_onehot[:,self.shortlist_size:]
        multiplier = (cluster_onehot == 0).long()
        # multiplier += cluster_onehot * torch.tensor(self.cluster_size)
        cluster_root_loss = cluster_root_loss * multiplier.float()

        head_loss[:, self.shortlist_size:] = cluster_root_loss
        head_loss = torch.sum(head_loss, dim=1)

        multiplier += cluster_onehot * torch.tensor(self.cluster_size).cuda()
        num_loss = torch.sum(multiplier, dim=1) + self.shortlist_size

        # loss = (head_loss + total_cluster_loss) / num_loss.float()
        loss = ((head_loss + total_cluster_loss) / num_loss.float()).mean()

        return loss
Esempio n. 28
0
    def __init__(self, input_shape, n_convfilter, \
                 n_fc_filters, h_shape, conv3d_filter_shape):
        print("initializing \"encoder\"")
        #input_shape = (self.batch_size, 3, img_w, img_h)
        super(encoder, self).__init__()
        #conv1
        conv1_kernal_size = 7
        self.conv1 = Conv2d(in_channels= input_shape[1], \
                            out_channels= n_convfilter[0], \
                            kernel_size= conv1_kernal_size, \
                            padding = int((conv1_kernal_size - 1) / 2))

        #conv2
        conv2_kernal_size = 3
        self.conv2 = Conv2d(in_channels= n_convfilter[0], \
                            out_channels= n_convfilter[1], \
                            kernel_size= conv2_kernal_size,\
                            padding = int((conv2_kernal_size - 1) / 2))

        #conv3
        conv3_kernal_size = 3
        self.conv3 = Conv2d(in_channels= n_convfilter[1], \
                            out_channels= n_convfilter[2], \
                            kernel_size= conv2_kernal_size,\
                            padding = int((conv3_kernal_size - 1) / 2))

        #conv4
        conv4_kernal_size = 3
        self.conv4 = Conv2d(in_channels= n_convfilter[2], \
                            out_channels= n_convfilter[3], \
                            kernel_size= conv2_kernal_size,\
                            padding = int((conv4_kernal_size - 1) / 2))

        #conv5
        conv5_kernal_size = 3
        self.conv5 = Conv2d(in_channels= n_convfilter[3], \
                            out_channels= n_convfilter[4], \
                            kernel_size= conv2_kernal_size,\
                            padding = int((conv5_kernal_size - 1) / 2))

        #conv6
        conv6_kernal_size = 3
        self.conv6 = Conv2d(in_channels= n_convfilter[4], \
                            out_channels= n_convfilter[5], \
                            kernel_size= conv2_kernal_size,\
                            padding = int((conv6_kernal_size - 1) / 2))

        #pooling layer
        self.pool = MaxPool2d(kernel_size=2, padding=1)

        #nonlinearities of the network
        self.leaky_relu = LeakyReLU(negative_slope=0.01)
        self.sigmoid = Sigmoid()
        self.tanh = Tanh()

        #find the input feature map size of the fully connected layer
        fc7_feat_w, fc7_feat_h = self.fc_in_featmap_size(input_shape,
                                                         num_pooling=6)
        #define the fully connected layer
        self.fc7 = Linear(int(n_convfilter[5] * fc7_feat_w * fc7_feat_h),
                          n_fc_filters[0])

        #define the FCConv3DLayers in 3d convolutional gru unit
        self.t_x_s_update = FCConv3DLayer_torch(n_fc_filters[0],
                                                conv3d_filter_shape, h_shape)
        self.t_x_s_reset = FCConv3DLayer_torch(n_fc_filters[0],
                                               conv3d_filter_shape, h_shape)
        self.t_x_rs = FCConv3DLayer_torch(n_fc_filters[0], conv3d_filter_shape,
                                          h_shape)
Esempio n. 29
0
# of the input parameters.
hidden_size = 2

# Since we are generating a binary class, we need to identify to which blob (class) the
# point belongs to.
output_size = 1

y = np.reshape(y, (len(y), 1))
inputs = torch.tensor(X, dtype=torch.float)
labels = torch.tensor(y, dtype=torch.float)

# We write a simple sequential two layer neural network model.
model = Sequential(Linear(in_features=input_size, out_features=hidden_size),
                   ReLU(),
                   Linear(in_features=input_size, out_features=output_size),
                   Sigmoid())

# Setup the loss function. We are currently using Binary Cross Entropy
# You can also use torch.nn.BCEWithLogitsLoss and remove the Sigmoid
# layer from the model as this is already included in the loss function.
criterion = torch.nn.BCELoss(reduction='mean')

# Setup the optimizer to determine the parameters for the neural network
# to do binary classification. Do play around this other optimizers.
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

# How many epochs should be used for the model training?
num_epochs = 30

# At what frequency should we print the current loss.
print_freq = 10
Esempio n. 30
0
 def __init__(self):
     super(Model, self).__init__()
     self.l1 = Linear(8, 6)
     self.l2 = Linear(6, 4)
     self.l3 = Linear(4, 1)
     self.sigmoid = Sigmoid()