Beispiel #1
0
    def __init__(self, users, problem):
        self.users = users
        self.problem = problem
        self.size = len(users)

        ####the optimal object according to min and avg aggregation function###
        self.x_star_min = self.get_x_star(False)
        self.x_star_avg = self.get_x_star(True)
        for u in self.users:
            u.utility_star_group_x_star_min = self.x_star_min.get_utility(
                u.preference_weights)
            u.utility_star_group_x_star_avg = self.x_star_avg.get_utility(
                u.preference_weights)

        ###ONLY FOR PERCEPTRON_AVERAGE ###
        self.weights = [
            np.zeros(problem.types),
            np.zeros(problem.areas),
            np.zeros((problem.roomtypes, problem.max_rooms)),
            np.zeros((problem.features, problem.options))
        ]

        self.avg_weights_star = self.get_avg_weights()
        self.x_star_average_perceptron = utils.model(self.avg_weights_star,
                                                     problem)
        self.utility_star_x_star = self.x_star_average_perceptron.get_utility(
            self.avg_weights_star)
Beispiel #2
0
def eval_epoch(model, data_loader, loss_fn, device, epoch_num, tensorboard_writer):
    model.eval()

    running_loss = 0.0
    processed_size = 0

    for batch, labels in data_loader:
        batch = batch.to(device)
        labels = labels.to(device)

        with torch.set_grad_enabled(False):
            prediction_x = model(batch)[0]
            loss_x = loss_fn(prediction_x, labels)

        running_loss += loss_x.item() * batch.size(0)
        processed_size += batch.size(0)

    loss = running_loss / processed_size
    message = f"epoch {epoch_num}: loss value={loss}"
    logging.info(message)
    print(message)

    if tensorboard_writer is not None:
        tensorboard_writer.add_scalar('Loss', loss, epoch_num)

    return loss
Beispiel #3
0
def fit_epoch(model, data_loader, loss_fn, optimizer, device):
    model.train(True)

    running_loss = 0.0
    processed_data = 0

    for batch, labels in data_loader:
        batch = batch.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        prediction_x, prediction_aux1, prediction_aux2 = [model(batch)[j] for j in range(3)]
        loss_x = loss_fn(prediction_x, labels)
        loss_aux1 = loss_fn(prediction_aux1, labels)
        loss_aux2 = loss_fn(prediction_aux2, labels)

        loss_x.backward()
        loss_aux1.backward()
        loss_aux2.backward()
        optimizer.step()

        running_loss += loss_x.item() * batch.size(0)
        processed_data += batch.size(0)

    train_loss = running_loss / processed_data

    return train_loss
Beispiel #4
0
def main():
    path_to_x = 'dataset/t10k-images.idx3-ubyte'
    path_to_y = 'dataset/t10k-labels.idx1-ubyte'
    path_to_model = 'result.npy'

    parser = argparse.ArgumentParser(description='predict.py')
    parser.add_argument('--x_test_dir=',
                        dest='x_test_dir',
                        default=path_to_x,
                        type=str)
    parser.add_argument('--y_test_dir=',
                        dest='y_test_dir',
                        default=path_to_y,
                        type=str)
    parser.add_argument('--model_input_dir=',
                        dest='model_input_dir',
                        default=path_to_model,
                        type=str)
    args = parser.parse_args()

    weights = np.load(args.model_input_dir)

    X, y = prepare_data(args.x_test_dir, args.y_test_dir)
    y_pred = model(X, weights)
    y_pred_matrix = y_to_matrix(y_pred, 10)
    print(classification_report(y, y_pred_matrix))
def evaluate(data_source):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0.
    ntokens = len(corpus.dictionary)
    if args.model != 'Transformer':
        hidden = model.init_hidden(eval_batch_size)
    with torch.no_grad():
        for i in range(0, data_source.size(0) - 1, args.bptt):
            data, targets = get_batch(data_source, i)
            if args.model == 'Transformer':
                output = model(data)
            else:
                output, hidden = model(data, hidden)
                hidden = repackage_hidden(hidden)
            output_flat = output.view(-1, ntokens)
            total_loss += len(data) * criterion(output_flat, targets).item()
    return total_loss / (len(data_source) - 1)
Beispiel #6
0
 def add_phrase(self, phrase):
     Id=len(self.phrase_corpus)
     self.phrase_corpus.append(phrase)
     self.phrase_index = hnswlib.Index('cosine', 512)
     self.phrase_index.init_index(len(self.phrase_corpus), ef_construction=200, M=48, random_seed=36)
     if len(self.phrase_corpus) > 1:
         self.phrase_index.load_index("phrase_index", max_elements=len(self.phrase_corpus))
     self.phrase_index.add_items(utils.model([phrase]))
     self.phrase_index.save_index("phrase_index")
     return Id, phrase
def test(model, metric_fc, test_loader):
    model.eval()
    accuracies = []
    for batch_id, (spec_mag, label) in enumerate(test_loader()):
        feature = model(spec_mag)
        output = metric_fc(feature, label)
        label = paddle.reshape(label, shape=(-1, 1))
        acc = accuracy(input=output, label=label)
        accuracies.append(acc.numpy()[0])
    model.train()
    return float(sum(accuracies) / len(accuracies))
Beispiel #8
0
def calc_sentence_logprob(model, sentence):
    """
    Calculates the sentence log-prob
    """
    if len(sentence) < 1:
        return -float('inf')

    log_probs = torch.log(F.softmax(model(sentence), dim=0))
    ids = torch.Tensor(sentence[1:]).long()
    sentence_log_prob = torch.sum(log_probs.gather(1, ids.view(-1, 1)))

    return sentence_log_prob.item()
    def __init__(self, problem, weight_range, nb_optimal_preferences):
        self.problem = problem
        self.preference_weights = self.generate_weights(nb_optimal_preferences, weight_range)

        self.x_star = utils.model(self.preference_weights, problem)
        self.utility_star_x_star = self.get_utility_star(self.x_star.phi)

        self.utility_star_group_x_star_min = None
        self.utility_star_group_x_star_avg = None
        self.current_weights = [np.zeros(problem.types), np.zeros(problem.areas),
                                np.zeros((problem.roomtypes, problem.max_rooms)),
                                np.zeros((problem.features, problem.options))]
Beispiel #10
0
 def deduplicate(self,phrase):
     if len(self.phrase_corpus)==0:
         return self.add_phrase(phrase)
     nearest_neighbor=self.phrase_index.knn_query(utils.model([phrase]))
     if nearest_neighbor != []:
         closest_neighbor, closest_distance = nearest_neighbor
     if closest_neighbor[0] == []:
         return self.add_phrase(phrase)
     if closest_distance[0][0] > distance_threshold:
         return self.add_phrase(phrase)
     return_phrase=self.phrase_corpus[closest_neighbor[0][0]]
     return self.phrase_corpus.index(return_phrase),return_phrase
def train():
    # Turn on training mode which enables dropout.
    model.train()
    total_loss = 0.
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    if args.model != 'Transformer':
        hidden = model.init_hidden(args.batch_size)
    for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)):
        data, targets = get_batch(train_data, i)
        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        model.zero_grad()
        if args.model == 'Transformer':
            output = model(data)
        else:
            hidden = repackage_hidden(hidden)
            output, hidden = model(data, hidden)
        loss = criterion(output.view(-1, ntokens), targets)
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
        for p in model.parameters():
            p.data.add_(-lr, p.grad.data)

        total_loss += loss.item()

        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss / args.log_interval
            elapsed = time.time() - start_time
            print(
                '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                'loss {:5.2f} | ppl {:8.2f}'.format(
                    epoch, batch,
                    len(train_data) // args.bptt, lr,
                    elapsed * 1000 / args.log_interval, cur_loss,
                    math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()
Beispiel #12
0
def evaluate(args, model, tokenizer, prefix=""):
    # Loop to handle MNLI double evaluation (matched, mis-matched)
    eval_output_dir = args.output_dir

    eval_dataset = load_and_cache_examples(args, tokenizer, evaluate=True)

    if args.local_rank in [-1, 0]:
        os.makedirs(eval_output_dir, exist_ok=True)

    # Prepare dataloader
    eval_dataloader, args = get_dataloader(eval_dataset,
                                           tokenizer,
                                           args,
                                           split='eval')

    # multi-gpu evaluate
    if args.n_gpu > 1:
        model = torch.nn.DataParallel(model)

    # Eval!
    logger.info("***** Running evaluation {} *****".format(prefix))
    logger.info("  Num examples = {}".format(len(eval_dataset)))
    logger.info("  Batch size = {}".format(args.eval_batch_size))
    eval_loss = 0.0
    nb_eval_steps = 0
    model.eval()

    for batch in tqdm(eval_dataloader, desc="Evaluating"):
        inputs, labels = (batch, batch)
        inputs = inputs.to(args.device)
        labels = labels.to(args.device)

        with torch.no_grad():
            outputs = model(inputs, labels=labels)
            lm_loss = outputs[0]
            eval_loss += lm_loss.mean().item()
        nb_eval_steps += 1

    eval_loss = eval_loss / nb_eval_steps
    perplexity = torch.exp(torch.tensor(eval_loss))

    result = {"perplexity": perplexity}

    output_eval_file = os.path.join(eval_output_dir, prefix,
                                    "eval_results.txt")
    with open(output_eval_file, "w") as writer:
        logger.info("***** Eval results {} *****".format(prefix))
        for key in sorted(result.keys()):
            logger.info("  %s = %s", key, str(result[key]))
            writer.write("%s = %s\n" % (key, str(result[key])))

    return result
Beispiel #13
0
 def add_node(self, phrase):
     self.phrase_corpus_length += 1
     self.phrase_corpus.append(phrase)
     if self.phrase_corpus_length > 2:
         self.node_index = None
     self.node_index = hnswlib.Index('cosine', 512)
     self.node_index.init_index(self.phrase_corpus_length,
                                ef_construction=200,
                                M=48,
                                random_seed=36)
     if self.phrase_corpus_length > 2:
         self.node_index.load_index(self.name,
                                    max_elements=self.phrase_corpus_length)
     self.node_index.add_items(utils.model([phrase]))
     self.node_index.save_index(self.name)
Beispiel #14
0
 def return_node(self, phrase):
     non_stop_phrase = ' '.join([token.text for token in utils.sp(phrase)])
     if len(non_stop_phrase) > 1:
         phrase = non_stop_phrase
     if self.node_index is None:
         self.add_node(phrase)
     nearest_neighbor = self.node_index.knn_query(utils.model([phrase]))
     if nearest_neighbor != []:
         closest_neighbor, closest_distance = nearest_neighbor
     if closest_neighbor[0] == []:
         self.add_node(phrase)
         return phrase
     if closest_distance[0][0] > self.distance_threshold:
         self.add_node(phrase)
         return phrase
     return self.phrase_corpus[closest_neighbor[0][0]]
Beispiel #15
0
def calc_sent_loss(sent, model, criterion, lang_ids=None):
    """
    Calculate the loss value for the entire sentence
    """
    if lang_ids is not None:
        lang_ids = torch.LongTensor([1 if _ == 'eng' or _ == 'engspa' or _ == '<s>' else 0 for _ in lang_ids]).to(DEVICE)
    targets = torch.LongTensor([model.vocab[tok] for tok in sent[1:]]).to(DEVICE)
    logits, lang_ids_pred = model(sent, lang_ids)
    loss = criterion(logits, targets)
    if lang_ids is not None:
        loss += criterion(lang_ids_pred, lang_ids[1:])

    # gen_sent = ' '.join([model.vocab[idx] for idx in torch.argmax(logits, dim=1)])
    # with open('log/{}_gen_sent.txt'.format(args.dataset), 'a+') as f:
    #     f.write(gen_sent + '\n')
    return loss
Beispiel #16
0
    def __init__(self):
        self.hp_dict = utils.hp_dictionary
        self.hp_dict['activation'] = [1., 2., 3.]

        self.position = np.array([
            random.sample(list(self.hp_dict[name]), 1)[0]
            for name in sorted(list(self.hp_dict.keys()))
        ])
        self.pbest_position = self.position
        self.pbest_value = float('inf')
        self.velocity = np.zeros(len(self.position), dtype='float')

        self.low, self.high = [0., 0., 0., 0.], [0., 0., 0., 0.]
        self.overal = 0.

        self.model = utils.model()
        self.update_model()
Beispiel #17
0
def generate_sent(model, max_len):
    """
    Generate a sentence
    """
    hist = ['<s>']
    eos = model.vocab['<s>']
    sent = []

    while len(sent) < max_len:
        logits = model(hist + ['<s>'])[0]
        if logits.dim() > 1:
            logits = logits[-1]
        next_word = gumbel_argmax(logits, dim=0)
        if next_word == eos:
            break
        sent.append(model.vocab.itos[next_word])
        hist += [model.vocab.itos[next_word]]

    return sent
Beispiel #18
0
def init():
    MODEL_DIR = '/var/azureml-app/azureml-models/personlabV1/2/personlab/'
    multiscale = [1.0, 1.5, 2.0]

    global tf_img
    tf_img = []
    global outputs
    outputs = []
    for i in range(len(multiscale)):
        scale = multiscale[i]
        tf_img.append(
            tf.placeholder(tf.float32,
                           shape=[1, int(scale * 401),
                                  int(scale * 401), 3]))
        outputs.append(utils.model(tf_img[i]))
    global sess
    sess = tf.Session()

    global_vars = tf.global_variables()
    saver = tf.train.Saver(var_list=global_vars)
    checkpoint_path = MODEL_DIR + 'model.ckpt'
    saver.restore(sess, checkpoint_path)
Beispiel #19
0
def compute_correlations(query: ExplainerQuery):
    """
    Computes the correlation between each word of the sequence and
    the "label" within the hypothesis. The correlation is over all the 
    concatenated encoder attention heads and is tokenwise.

    If the label contains multiple tokens, the average is taken.

    Returns a dataframe and list:
       - The dataframe where each row is a token, the first column is
    "word" with the text representation of the token and each additional
    column is the label and the correlation of that label against the input
    sequence token.
       - The list are scores coming from each label
    """

    hypothesis_template = query.hypothesis_template
    candidate_labels = query.labels
    sequence = query.sequence

    Q = [
        SingleQuery(hypothesis=hypothesis_template.format(label),
                    sequence=sequence) for label in candidate_labels
    ]
    tokens = utils.tokenize(Q)

    # Run a single forward pass, saving the encoder hidden states
    with torch.no_grad():
        outputs = utils.model(
            input_ids=tokens["input_ids"],
            attention_mask=tokens["attention_mask"],
            output_hidden_states=True,
            return_dict=True,
        )

    # Get the logits from the output
    logits = outputs["logits"].detach().cpu().numpy()

    # Keep only the entailment and contradiction logits
    contradiction_id = 0
    entailment_id = 2
    logits = logits[..., [contradiction_id, entailment_id]]

    # Softmax over remaining logits for each sequence
    scores = np.exp(logits) / np.exp(logits).sum(-1, keepdims=True)
    scores = scores[..., -1]

    # Computer correlation from encoder hidden states
    layers = outputs["encoder_hidden_states"]

    # Remove from the GPU and convert to a numpy array
    # (layer, candidate_labels, token, embedding)
    state = np.array([layer.detach().cpu().numpy() for layer in layers])

    # Make the state easier to work with
    # (candidate_labels, token, layer, embedding)
    state = state.transpose(1, 2, 0, 3)

    # Squish the layers and embeddings for correlation calculation
    # (candidate_labels, token, layer(+)embedding)
    state = state.reshape((*state.shape[:-2], -1))

    # Measure the length of the hypothesis and response
    hypothesis_prefix = hypothesis_template.split("{}")[0]
    n_hypothesis_prefix = _measure_token_length(hypothesis_prefix)
    n_response = _measure_token_length(sequence)

    df = pd.DataFrame()

    for label, V in zip(candidate_labels, state):

        # Measure where each label via tokens starts and ends
        n_label = _measure_token_length(label)
        n_label_start = n_response + n_hypothesis_prefix + 2
        n_label_end = n_label_start + n_label

        # (words, label_length)
        C = 1 - cdist(V, V[n_label_start:n_label_end], metric="correlation")

        # Add up over all label words
        C = np.sum(C, axis=1)

        # Normalize to the self-correlation
        C /= np.average(C[n_label_start:n_label_end])

        # Truncate to the response only
        C = C[1:n_response + 1]

        df[label] = C

    df.insert(0, "word", _token_chunks(sequence))

    return df, scores
Beispiel #20
0
def run(group, problem, alpha, file_prefix, total_time, simulation_time):
    regret_min = []
    regret_avg = []
    r = []
    individual_regret = np.zeros([group.size, problem.iterations])

    start_time = time.time()
    ##########RUN PERCEPTRON AVERAGE###############
    for counter in range(problem.iterations - 1):

        weights = group.weights

        x = utils.model(weights, problem)

        print('System recommendation using the average perceptron: ', x.object)

        regret = group.utility_star_x_star - x.get_utility(
            group.avg_weights_star)
        r.append(regret)

        # Calculate the regret for every individual user
        #
        for i in range(0, group.size):
            usr = group.users[i]
            reg = usr.get_regret(x.phi)
            individual_regret[i, counter] = reg

        if (regret == 0.0):
            final_time = (time.time() - start_time) + total_time
            file = utils.save_to_pickle(x.object, r, regret_min, regret_avg,
                                        regret_min, 'average', file_prefix,
                                        problem, group, simulation_time,
                                        final_time, counter, individual_regret)
            return x.object, simulation_time, file

        # phi_y_bar, s_t = average(group, phi_y_list, alpha)

        improvements = []

        for u in group.users:
            sim_start_time = time.time()
            x_bar = u.step(x.phi, x.object, alpha)
            bar = [
                np.array(x_bar.phi[0]),
                np.array(x_bar.phi[1]),
                np.array(x_bar.phi[2]),
                np.array(x_bar.phi[3])
            ]
            improvements.append(bar)

            # y, improvements[group.get_index(u)] = u.step(phi_y, alpha)
            temp_simulation_time = time.time() - sim_start_time
            simulation_time += temp_simulation_time
            print('Improvement user ', group.get_index(u), ': ', x_bar.object)

        phi_x_bar = utils.avg_phi_list(improvements, group.problem)

        group.update_weights(phi_x_bar, x.phi)

    x = utils.model(weights, problem)
    regret = group.utility_star_x_star - x.get_utility(group.avg_weights_star)
    r.append(regret)

    # Calculate the regret for every individual user
    #
    for i in range(0, group.size):
        usr = group.users[i]
        reg = usr.get_regret(x.phi)
        individual_regret[i, counter + 1] = reg

    final_time = (time.time() - start_time) + total_time
    #y, regret_avg, regret_min, regret_su_avg, regret_su_min, strategy, file_prefix, problem, group, sim_time, runtime, counter
    file = utils.save_to_pickle(x.object, r, regret_min, regret_avg,
                                regret_min, 'average', file_prefix, problem,
                                group, simulation_time, final_time,
                                problem.iterations, individual_regret)
    return x.object, simulation_time, file
Beispiel #21
0
train_set_x_flatten = train_set_x_orig.reshape(
    train_set_x_orig.shape[0], -1).T  # reshape training set to (nx, m)
test_set_x_flatten = test_set_x_orig.reshape(
    test_set_x_orig.shape[0], -1).T  # reshape testing set to (nx, m)

train_set_x = train_set_x_flatten / 255
test_set_x = test_set_x_flatten / 255

print("Started modeling")
tic = time.process_time()

d = utils.model(train_set_x,
                train_set_y,
                test_set_x,
                test_set_y,
                num_iterations=2000,
                learning_rate=0.005,
                print_cost=True)

toc = time.process_time()
print("Ended modeling. Time elapsed: " + str(1000 * (toc - tic)) + "ms")

index = 19
print("y = " + str(test_set_y[0, index]) +
      ", the prediction is that it is a \"" +
      classes[d["Y_prediction_test"][0, index]].decode("utf-8") +
      "\" picture.")
plt.figure()
plt.imshow(test_set_x[:, index].reshape((num_px, num_px, 3)))
plt.show()
def run(strategy,
        group,
        problem,
        alpha,
        file_prefix,
        total_time,
        simulation_time,
        avg=False):
    regret_min = []
    regret_avg = []
    regret_su_min = []
    regret_su_avg = []
    individual_regret = np.zeros([group.size, problem.iterations])
    start_time = time.time()
    ##########RUN PERCEPTRON AVERAGE###############
    extra_time = 0
    for counter in range(problem.iterations - 1):
        print('Iteration: (', counter, '/', problem.iterations, ')')
        ######USER SELECTION#######
        if (strategy == strategy.LEAST_MISERY):
            user, x_group = utils.get_least_misery_user(group, problem, avg)

        elif (strategy == strategy.RANDOM):
            user, x_group, extra_t = utils.get_random_user(group, problem, avg)
            extra_time += extra_t
        ############################

        #Find object to present to this user
        x = utils.model(user.current_weights, problem)
        print('System recommendation using the ', strategy.value,
              ' strategy: ', x.object)

        #Calculate regret for the group recommendation
        #note: I use x_group because we want to know group wide regret, x is generated using individual weights
        rgrt_avg = utils.get_regret(x_group, group, aggregation_function='avg')
        regret_avg.append(rgrt_avg)
        rgrt_min = utils.get_regret(x_group, group, aggregation_function='min')
        regret_min.append(rgrt_min)

        #Calculate the regret for the picked user
        rgrt_avg = utils.get_regret(x, group, aggregation_function='avg')
        regret_su_avg.append(rgrt_avg)
        rgrt_min = utils.get_regret(x, group, aggregation_function='min')
        regret_su_min.append(rgrt_min)

        #Calculate the regret for every individual user
        #
        for i in range(0, group.size):
            usr = group.users[i]
            reg = usr.get_regret(x_group.phi)
            individual_regret[i, counter] = reg

        #Algorithm ends when the ideal object is found
        if (rgrt_min == 0.0 and not avg):
            final_time = ((time.time() - start_time) + total_time) - extra_time
            file = utils.save_to_pickle(x_group.object, regret_avg, regret_min,
                                        regret_su_avg, regret_su_min,
                                        strategy.value, file_prefix, problem,
                                        group, simulation_time, final_time,
                                        counter, individual_regret)
            return x_group.object, simulation_time, file
        if (rgrt_avg == 0.0 and avg):
            final_time = ((time.time() - start_time) + total_time) - extra_time
            file = utils.save_to_pickle(x_group.object, regret_avg, regret_min,
                                        regret_su_avg, regret_su_min,
                                        strategy.value, file_prefix, problem,
                                        group, simulation_time, final_time,
                                        counter, individual_regret)
            return x_group.object, simulation_time, file
        # start_time = time.time()

        start_sim_time = time.time()
        #Simulate a step from the chosen user
        x_bar = user.step(x.phi, x.object, alpha)

        temp_simulation_time = time.time() - start_sim_time
        print('Improvement user: '******': ', x_bar.object)
        simulation_time += temp_simulation_time

        #update weights for the chosen user
        user.update_weights(x_bar.phi, x.phi)

    x_final = utils.get_aggregation_object(group, problem, avg)

    regret_avg.append(
        utils.get_regret(x_final, group, aggregation_function='avg'))
    regret_min.append(
        utils.get_regret(x_final, group, aggregation_function='min'))
    regret_su_avg.append(
        utils.get_regret(x_final, group, aggregation_function='avg'))
    regret_su_min.append(
        utils.get_regret(x_final, group, aggregation_function='min'))

    for i in range(0, group.size):
        usr = group.users[i]
        reg = usr.get_regret(x_final.phi)
        individual_regret[i, counter + 1] = reg

    final_time = ((time.time() - start_time) + total_time) - extra_time
    file = utils.save_to_pickle(x_final, regret_avg, regret_min, regret_su_avg,
                                regret_su_min, strategy.value, file_prefix,
                                problem, group, simulation_time, final_time,
                                problem.iterations, individual_regret)
    return x_final, simulation_time, file
Beispiel #23
0
def train_epoch(model, tokenizer, optimizer, scheduler, train_dataloader,
                tr_loss, logging_loss, global_step,
                steps_trained_in_current_epoch, tb_writer, args):
    """train one epoch"""
    if args.fp16:
        try:
            from apex import amp
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use fp16 training."
            )

    epoch_iterator = tqdm(train_dataloader,
                          desc="Iteration",
                          disable=args.local_rank not in [-1, 0])
    for step, batch in enumerate(epoch_iterator):

        # Skip past any already trained steps if resuming training
        if steps_trained_in_current_epoch > 0:
            steps_trained_in_current_epoch -= 1
            continue

        inputs, labels = (batch, batch)
        inputs = inputs.to(args.device)
        labels = labels.to(args.device)
        model.train()
        outputs = model(inputs, labels=labels)
        loss = outputs[
            0]  # model outputs are always tuple in transformers (see doc)

        if args.n_gpu > 1:
            loss = loss.mean(
            )  # mean() to average on multi-gpu parallel training
        if args.gradient_accumulation_steps > 1:
            loss = loss / args.gradient_accumulation_steps

        if args.fp16:
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
        else:
            loss.backward()

        tr_loss += loss.item()
        if (step + 1) % args.gradient_accumulation_steps == 0:
            if args.fp16:
                torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer),
                                               args.max_grad_norm)
            else:
                torch.nn.utils.clip_grad_norm_(model.parameters(),
                                               args.max_grad_norm)
            optimizer.step()
            scheduler.step()  # Update learning rate schedule
            model.zero_grad()
            global_step += 1

            # Log metrics
            if args.local_rank in [
                    -1, 0
            ] and args.logging_steps > 0 and global_step % args.logging_steps == 0:
                if (
                        args.local_rank == -1 and args.evaluate_during_training
                ):  # Only evaluate when single GPU otherwise metrics may not average well
                    results = evaluate(args, model, tokenizer)
                    for key, value in results.items():
                        tb_writer.add_scalar("eval_{}".format(key), value,
                                             global_step)
                tb_writer.add_scalar("lr", scheduler.get_lr()[0], global_step)
                tb_writer.add_scalar("loss", (tr_loss - logging_loss) /
                                     args.logging_steps, global_step)
                logging_loss = tr_loss

            # save checkpoint
            if args.local_rank in [
                    -1, 0
            ] and args.save_steps > 0 and global_step % args.save_steps == 0:
                if args.evaluate_during_training:
                    save_checkpoint(model, optimizer, scheduler, tokenizer,
                                    args, global_step)

        if args.max_steps > 0 and global_step > args.max_steps:
            epoch_iterator.close()
            break

    return model, optimizer, scheduler, global_step, tr_loss, logging_loss
def train(args):
    # 设置支持多卡训练
    dist.init_parallel_env()
    if dist.get_rank() == 0:
        shutil.rmtree('log', ignore_errors=True)
        # 日志记录器
        writer = LogWriter(logdir='log')
    # 数据输入的形状
    input_shape = eval(args.input_shape)
    # 获取数据
    train_dataset = CustomDataset(args.train_list_path, model='train', spec_len=input_shape[3])
    train_loader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers)

    test_dataset = CustomDataset(args.test_list_path, model='test', spec_len=input_shape[3])
    test_loader = DataLoader(dataset=test_dataset, batch_size=args.batch_size, num_workers=args.num_workers)

    # 获取模型
    model = resnet34()
    metric_fc = ArcMarginProduct(feature_dim=512, class_dim=args.num_classes, easy_margin=args.easy_margin)
    if dist.get_rank() == 0:
        paddle.summary(model, input_size=input_shape)
    # 设置支持多卡训练
    model = paddle.DataParallel(model)
    metric_fc = paddle.DataParallel(metric_fc)

    # 分段学习率
    boundaries = [10, 30, 70, 100]
    lr = [0.1 ** l * args.learning_rate for l in range(len(boundaries) + 1)]
    scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=boundaries, values=lr, verbose=True)
    # 设置优化方法
    optimizer = paddle.optimizer.Adam(parameters=model.parameters() + metric_fc.parameters(),
                                      learning_rate=scheduler,
                                      weight_decay=paddle.regularizer.L2Decay(1e-4))

    # 加载预训练模型
    if args.pretrained_model is not None:
        model_dict = model.state_dict()
        param_state_dict = paddle.load(os.path.join(args.pretrained_model, 'model.pdparams'))
        for name, weight in model_dict.items():
            if name in param_state_dict.keys():
                if weight.shape != list(param_state_dict[name].shape):
                    print('{} not used, shape {} unmatched with {} in model.'.
                            format(name, list(param_state_dict[name].shape), weight.shape))
                    param_state_dict.pop(name, None)
            else:
                print('Lack weight: {}'.format(name))
        model.set_dict(param_state_dict)
        print('成功加载预训练模型参数')

    # 恢复训练
    if args.resume is not None:
        model.set_state_dict(paddle.load(os.path.join(args.resume, 'model.pdparams')))
        optimizer.set_state_dict(paddle.load(os.path.join(args.resume, 'optimizer.pdopt')))
        print('成功加载模型参数和优化方法参数')

    # 获取损失函数
    loss = FocalLoss(gamma=args.gamma)
    train_step = 0
    test_step = 0
    # 开始训练
    for epoch in range(args.num_epoch):
        loss_sum = []
        for batch_id, (spec_mag, label) in enumerate(train_loader()):
            feature = model(spec_mag)
            output = metric_fc(feature, label)
            # 计算损失值
            los = loss(output, label)
            loss_sum.append(los)
            los.backward()
            optimizer.step()
            optimizer.clear_grad()
            # 多卡训练只使用一个进程打印
            if batch_id % 100 == 0 and dist.get_rank() == 0:
                print('[%s] Train epoch %d, batch_id: %d, loss: %f' % (
                    datetime.now(), epoch, batch_id, sum(loss_sum) / len(loss_sum)))
                writer.add_scalar('Train loss', los, train_step)
                train_step += 1
                loss_sum = []
        # 多卡训练只使用一个进程执行评估和保存模型
        if dist.get_rank() == 0:
            acc = test(model, metric_fc, test_loader)
            print('[%s] Train epoch %d, accuracy: %f' % (datetime.now(), epoch, acc))
            writer.add_scalar('Test acc', acc, test_step)
            # 记录学习率
            writer.add_scalar('Learning rate', scheduler.last_lr, epoch)
            test_step += 1
            save_model(args, model, optimizer)
        scheduler.step()
                      nhid553=588,
                      lrf551=5,
                      lrf552=5,
                      lrf553=7)
 else:
     paras = init(nhid1=37, nhid2=12, nhid3=355, lrf1=2, lrf2=2, lrf3=9)
 if modelname == 'cnn':
     trainenergy, accuracy, di, testenergy, predarg = cnnmodel(X, Y, paras)
 elif modelname == 'cnnat':
     trainenergy, accuracy, di, testenergy, predarg, advloss = cnnatmodel(
         X, Y, paras, atepsilon)
 elif modelname == 'crfat':
     trainenergy, accuracy, di, testenergy, predarg, advloss = crfatmodel(
         X, Y, k1, k2, paras, atepsilon)
 elif modelname == 'crf':
     trainenergy, accuracy, di, testenergy, predarg = model(
         X, Y, k1, k2, paras)
 elif modelname == 'cnncomb':
     trainenergy, accuracy, di, testenergy, predarg = cnnmodel(
         X, Y, paras, flag='combine')
 elif modelname == 'cnncombat':
     trainenergy, accuracy, di, testenergy, predarg, advloss = cnnatmodel(
         X, Y, paras, atepsilon, flag='combine')
 elif modelname == 'crfcomb':
     trainenergy, accuracy, di, testenergy, predarg = model(X,
                                                            Y,
                                                            k1,
                                                            k2,
                                                            paras,
                                                            flag='combine')
 elif modelname == 'crfcombat':
     trainenergy, accuracy, di, testenergy, predarg, advloss = crfatmodel(