Example #1
0
def test():
    # Reading test set
    test_data = preprocess(FLAGS.test_files.split(','),
                           FLAGS.test_batch_size,
                           Config.n_input,
                           Config.n_context,
                           Config.alphabet,
                           hdf5_cache_path=FLAGS.test_cached_features_path)

    graph = create_inference_graph(batch_size=FLAGS.test_batch_size, n_steps=-1)
    evaluate.evaluate(test_data, graph)
def evaluate_mean(corpus, lexicon, mark):
    valence_pred = []
    valence_true = []
    arousal_pred = []
    arousal_true = []

    def VA_mean(text):
        sum_valence = 0
        sum_arousal = 0
        count = 0
        for word in text:
            for l in lexicon:
                if word == l[0]:
                    if l[1] > 9:
                        l[1] = 9
                    if l[1] < 1:
                        l[1] = 1
                    if l[2] > 9:
                        l[2] = 9
                    if l[2] < 1:
                        l[2] = 1
                    count = count + 1
                    sum_valence = sum_valence + l[1]
                    sum_arousal = sum_arousal + l[2]
        return [5., 5.] if count == 0 else [sum_valence / count, sum_arousal / count]

    num = len(corpus)
    for (i, text) in enumerate(corpus):
        V, A = VA_mean(text)
        valence_pred.append(V)
        arousal_pred.append(A)
        try:
            ind = [item[0] for item in mark].index(i + 1)
        except ValueError:
            raise Exception('File not found. NO. %i' % (i + 1))

        valence_true.append(mark[ind][1])
        arousal_true.append(mark[ind][2])

        # for item in mark:
        #     if (i + 1) == item[0]:
        #         valence_true.append(item[1])
        #         arousal_true.append(item[2])
        #         break
        #     else:
        #         raise Exception('File not found. NO. %i' % (i + 1))

        if i % 10 == 0:
            logger.info("evaluate for text : %i/%i..." % (i, num))

    evaluate(valence_true, valence_pred, 'valence')
    evaluate(arousal_true, arousal_pred, 'arousal')
def evaluate_tfidf_geo(corpus, lexicon, mark):
    valence_pred = []
    valence_true = []
    arousal_pred = []
    arousal_true = []

    num = len(corpus)
    for (i, text) in enumerate(corpus):
        sum_valence = 1.
        sum_arousal = 1.
        count = 0.

        for word in text:
            for l in lexicon:
                if word == l[0]:
                    word_tfidf = tfidf(word, corpus[i], corpus)
                    # logger.info("tfidf of word %s is %f" % (word, word_tfidf))
                    if l[1] > 9:
                        l[1] = 9
                    if l[1] < 1:
                        l[1] = 1
                    if l[2] > 9:
                        l[2] = 9
                    if l[2] < 1:
                        l[2] = 1

                    count = count + word_tfidf
                    sum_valence = sum_valence * (l[1] ** word_tfidf)
                    sum_arousal = sum_arousal * (l[2] ** word_tfidf)

        if count == 0.:
            valence_pred.append(5.)
            arousal_pred.append(5.)
        else:
            # logger.info("%f %f" % (sum_valence ** (1. / count), sum_arousal ** (1. / count)))
            valence_pred.append(sum_valence ** (1. / count))
            arousal_pred.append(sum_arousal ** (1. / count))

        for item in mark:
            if (i + 1) == item[0]:
                valence_true.append(item[1])
                arousal_true.append(item[2])
                break

        if (i + 1) % 10 == 0:
            logger.info("evaluate for text : %i/%i..." % ((i + 1), num))

    evaluate(valence_true, valence_pred, 'valence')
    evaluate(arousal_true, arousal_pred, 'arousal')
Example #4
0
def linear_regression(X_train, X_test, Y_train, Y_test, plot=False):
    # Create linear regression object
    # The training data should be column vectors
    X_train, X_test = np.array(X_train).reshape((len(X_train), 1)), np.array(X_test).reshape((len(X_test), 1))
    regr = linear_model.LinearRegression()
    # Train the model using the training sets
    regr.fit(X_train, Y_train)
    predict = regr.predict(X_test)
    # record the experiment performance, Explained variance score: 1 is perfect prediction
    np.seterr(invalid='ignore')
    evaluate(list(predict), np.array(Y_test),
             'linear regression ' + 'Explained variance score: %.2f' % regr.score(X_test, Y_test))
    if plot is True:
        draw_linear_regression(X_train, Y_train, regr.predict(X_train))
        draw_linear_regression(X_test, Y_test, predict)
def evaluate_tfidf_mean(corpus, lexicon, mark):
    valence_pred = []
    valence_true = []
    arousal_pred = []
    arousal_true = []

    num = len(corpus)
    for (i, text) in enumerate(corpus):
        sum_valence = 0.
        sum_arousal = 0.
        count = 0.

        for word in text:
            for l in lexicon:
                if word == l[0]:
                    word_tfidf = tfidf(word, corpus[i], corpus)
                    # logger.info("tfidf of word %s is %f" % (word, word_tfidf))
                    if l[1] > 9:
                        l[1] = 9
                    if l[1] < 1:
                        l[1] = 1
                    if l[2] > 9:
                        l[2] = 9
                    if l[2] < 1:
                        l[2] = 1

                    count = count + word_tfidf
                    sum_valence = sum_valence + word_tfidf * l[1]
                    sum_arousal = sum_arousal + word_tfidf * l[2]

        if count == 0:
            valence_pred.append(5.)
            arousal_pred.append(5.)
        else:
            valence_pred.append(sum_valence / count)
            arousal_pred.append(sum_arousal / count)

        for item in mark:
            if (i + 1) == item[0]:
                valence_true.append(item[1])
                arousal_true.append(item[2])
                break

        if i % 10 == 0:
            logger.info("evaluate for text : %i/%i..." % (i, num))

    evaluate(valence_true, valence_pred, 'valence')
    evaluate(arousal_true, arousal_pred, 'arousal')
def update_json(inp_data, mode):
    updated_data = []
    ctr = 0
    for example in inp_data:
        bmpmaker.clear()
        if mode == "relative":
            true_seq_relative = RelativeEventSequence.from_eval_str(example["y_ref"])
            true_events_absolute = AbsoluteEventSequence.from_relative(true_seq_relative, width, width).events
            pred_seq_relative = RelativeEventSequence.from_eval_str(example["y_pred"])
            pred_events_absolute = AbsoluteEventSequence.from_relative(pred_seq_relative, width, width).events
        elif mode == "cursor":
            true_seq_cursor = CursorEventSequence.from_eval_str(example["y_ref"])
            true_events_absolute = AbsoluteEventSequence.from_cursor(true_seq_cursor, width, width).events
            pred_seq_cursor = CursorEventSequence.from_eval_str(example["y_pred"])
            pred_events_absolute = AbsoluteEventSequence.from_cursor(pred_seq_cursor, width, width).events
        else:
            raise Exception("Unknown or unsupported mode: {}".format(mode))

        bmpmaker.process_commands(true_events_absolute)
        true_bitmap = np.array(bmpmaker.bitmap)

        bmpmaker.clear()
        bmpmaker.process_commands(pred_events_absolute)
        pred_bitmap = np.array(bmpmaker.bitmap)
        hamming = evaluate(true_bitmap, pred_bitmap)
        example["hamming_distance"] = hamming
        updated_data.append(example)
        ctr += 1

        if ctr % 500 == 0:
            print "Progress: %d" % ctr

    return updated_data
def tabu_search(initSol, tabuListSz):
    MAX_ITER = 10000
    MAX_NO_IMPROVEMENTS = int(MAX_ITER * 0.25)
    noImporvementsCount = 0
    tabuList = deque(maxlen=tabuListSz)
    bestSol = initSol
    (solPath,bestSolVal) = evaluate(initSol)
    sol = initSol
    solVal = bestSolVal
    i = 0
    #TO DO: better condition
    while i < MAX_ITER and noImporvementsCount < MAX_NO_IMPROVEMENTS:
        i += 1
        sol, move, solVal, solPath = get_new_solution(sol,solPath,tabuList)
        if tabuList.__len__() == tabuListSz:
            tabuList.popleft()
        tabuList.append(move)
        if solVal < bestSolVal:
            noImporvementsCount = 0
            bestSol = sol
            bestSolVal = solVal
        else:
            noImporvementsCount += 1
        #print(move)
        #print(solVal)
        #print(bestSolVal)
        #print()
    return bestSolVal
def main():
    clean()

    ratings_train_text = sc.textFile(config.ML_RATINGS_TRAIN)
    ratings_train = (
        ratings_train_text
        .map(ml_parse.parse_line)
        .map(ml_parse.rating_convert))

    ratings_validation_text = sc.textFile(config.ML_RATINGS_VALIDATION)
    ratings_validation = (
        ratings_validation_text
        .map(ml_parse.parse_line)
        .map(ml_parse.rating_convert))

    ratings_train.cache()
    ratings_validation.cache()

    best_result = evaluate.evaluate(ratings_train, ratings_validation,
                                    config.ML_RESULTS_FILE)
    with open(config.ML_BEST_PARAMS_FILE, "w") as outfile:
        outfile.write("%s,%s\n" % ("rank", "lambda"))
        outfile.write("%s,%s" % (
            best_result.get("rank"), best_result.get("lambda")))
    best_model = best_result.get("model")
    best_model.save(sc, config.ML_MODEL)

    sc.stop()
Example #9
0
def _min_max(board, alpha, beta, depth=DEPTH-1):
    hsh = board.zobrist_hash()
    score = search_hash(depth, alpha, beta, hsh)
    best_move = None
    if score:
        return score
    if board.is_game_over() or depth == 0:
        score = evaluate(board)
        input_hash(hsh, depth, score, exact_flag)
        return score
    else:
        if board.turn == chess.WHITE:
            for move in check_best_move(hsh, board.generate_legal_moves()):
                board.push(move)  # make move
                score = _min_max(board, alpha, beta, depth-1)
                board.pop()  # unmake move
                if score > alpha:
                    alpha = score
                    best_move = move
                    if alpha >= beta:
                        break
            input_hash(hsh, depth, alpha, alpha_flag, best_move)
            return alpha
        else:
            for move in check_best_move(hsh, board.generate_legal_moves()):
                board.push(move)  # make move
                score = _min_max(board, alpha, beta, depth-1)
                board.pop()  # unmake move
                if score < beta:
                    beta = score
                    best_move = move
                    if alpha >= beta:
                        break
            input_hash(hsh, depth, beta, beta_flag, best_move)
            return beta
Example #10
0
def p_expression(p):
    '''expression : expression list
                  | terminal
                  | quote
                  | empty'''
    from evaluate import evaluate
    p[0] = evaluate(p[2] if len(p) == 3 else p[1], p.parser.toplevel)
def train_and_evaluate(model, train_dataloader, val_dataloader, optimizer,
                       loss_fn, metrics, params, model_dir, restore_file=None):
    """Train the model and evaluate every epoch.

    Args:
        model: (torch.nn.Module) the neural network
        params: (Params) hyperparameters
        model_dir: (string) directory containing config, weights and log
        restore_file: (string) - name of file to restore from (without its extension .pth.tar)
    """
    # reload weights from restore_file if specified
    if restore_file is not None:
        restore_path = os.path.join(args.model_dir, args.restore_file + '.pth.tar')
        logging.info("Restoring parameters from {}".format(restore_path))
        utils.load_checkpoint(restore_path, model, optimizer)

    best_val_acc = 0.0

    # learning rate schedulers for different models:
    if params.model_version == "resnet18":
        scheduler = StepLR(optimizer, step_size=150, gamma=0.1)
    # for cnn models, num_epoch is always < 100, so it's intentionally not using scheduler here
    elif params.model_version == "cnn":
        scheduler = StepLR(optimizer, step_size=100, gamma=0.2)

    for epoch in range(params.num_epochs):
     
        scheduler.step()
     
        # Run one epoch
        logging.info("Epoch {}/{}".format(epoch + 1, params.num_epochs))

        # compute number of batches in one epoch (one full pass over the training set)
        train(model, optimizer, loss_fn, train_dataloader, metrics, params)

        # Evaluate for one epoch on validation set
        val_metrics = evaluate(model, loss_fn, val_dataloader, metrics, params)        

        val_acc = val_metrics['accuracy']
        is_best = val_acc>=best_val_acc

        # Save weights
        utils.save_checkpoint({'epoch': epoch + 1,
                               'state_dict': model.state_dict(),
                               'optim_dict' : optimizer.state_dict()},
                               is_best=is_best,
                               checkpoint=model_dir)

        # If best_eval, best_save_path
        if is_best:
            logging.info("- Found new best accuracy")
            best_val_acc = val_acc

            # Save best val metrics in a json file in the model directory
            best_json_path = os.path.join(model_dir, "metrics_val_best_weights.json")
            utils.save_dict_to_json(val_metrics, best_json_path)

        # Save latest val metrics in a json file in the model directory
        last_json_path = os.path.join(model_dir, "metrics_val_last_weights.json")
        utils.save_dict_to_json(val_metrics, last_json_path)
Example #12
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--input', dest='input', help='Training data file, e.g. data/1_train.txt')
    parser.add_argument('-r', '--oround', dest='oround', default=20, help='Number of output rounds, e.g. 20 [default=20]')
    parser.add_argument('-m', '--model', dest='model', help='Model python library, e.g. modelxxx, should located in \'rgmodels\'')

    parser.add_argument('-o', '--ofile', dest='ofile', help='Output data file, e.g. result/1_modelxxx.txt')
    parser.add_argument('-gt', '--gt', dest='groundtruth', help='Groundtruth, use to test the model if provided')
    args = parser.parse_args()

    assert args.input is not None
    assert args.model is not None
    assert args.groundtruth is not None
    gt_data = io.read_database(args.groundtruth)

    in_data = io.read_database(args.input)
    nr_rows = len(in_data)
    nr_cols = len(in_data[0])
    nr_types = numpy.array(in_data).max()+1
    out_data = numpy.zeros((nr_rows, args.oround), dtype=numpy.int32)

    model = load_model(args.model)()
    model.train(in_data, nr_rows, nr_cols, nr_types)
    model.predict(out_data, nr_rows, args.oround, nr_types, gt_data)

    if args.ofile is not None:
        io.write_database(args.ofile, out_data)

    score, count = evaluate(out_data, gt_data)
    print("model={} score={}/{}".format(args.model, score, count))
def gen_random_evo(monograms, bigrams, line_types, a, A, B, C, D):
    # generates random skeleton for each type
    list_skeleton_1 = [x for x in line_types if line_types[x].typenum == 1]
    list_weight_1 = [line_types[x].occurrences for x in list_skeleton_1]
    random_line_type_1 = line_types[random_weighted_occurrence(list_skeleton_1, list_weight_1)]
    my_line_1 = populate_words(random_line_type_1, monograms)
    if my_line_1 == None:  # failure when populating with words
        return None

    list_skeleton_2 = [x for x in line_types if line_types[x].typenum == 2]
    list_weight_2 = [line_types[x].occurrences for x in list_skeleton_2]
    random_line_type_2 = line_types[random_weighted_occurrence(list_skeleton_2, list_weight_2)]
    my_line_2 = populate_words(random_line_type_2, monograms)
    if my_line_2 == None:  # failure when populating with words
        return None

    list_skeleton_3 = [x for x in line_types if line_types[x].typenum == 3]
    list_weight_3 = [line_types[x].occurrences for x in list_skeleton_3]
    random_line_type_3 = line_types[random_weighted_occurrence(list_skeleton_3, list_weight_3)]
    my_line_3 = populate_words(random_line_type_3, monograms)
    if my_line_3 == None:  # failur when populating with words
        return None

    my_random_haiku = Evo_object(my_line_1, my_line_2, my_line_3)
    my_random_haiku.update_score(
        evaluate.evaluate([my_line_1, my_line_2, my_line_3], monograms, bigrams, a, A, B, C, D)
    )

    #    print ([l.wordarray for l in my_random_haiku.triple])
    return my_random_haiku
Example #14
0
def run_baseline(test_data, train_data, baseline_type="random_train", aligned=False, verbose=False):
    total_hamming_distance = 0.0
    source_dataset = train_data if baseline_type == "random_test" else test_data
    ctr = 0
    for example in test_data:

        selected = pick_random(source_dataset)
        if aligned:
            true_sequence_absolute = events.AbsoluteEventSequence.from_aligned_string(example[1])
            pred_sequence_absolute = events.AbsoluteEventSequence.from_aligned_string(selected[1])
        else:
            true_sequence_absolute = events.AbsoluteEventSequence.from_string(example[1])
            pred_sequence_absolute = events.AbsoluteEventSequence.from_string(selected[1])

        bmpmaker.clear()
        bmpmaker.process_commands(true_sequence_absolute.events)
        true_bitmap = np.array(bmpmaker.bitmap)

        bmpmaker.clear()
        bmpmaker.process_commands(pred_sequence_absolute.events)
        pred_bitmap = np.array(bmpmaker.bitmap)

        hamming = evaluate.evaluate(true_bitmap, pred_bitmap)
        total_hamming_distance += hamming

        ctr += 1
        if verbose and ctr % 500 == 0:
            print "Progress: %d" % ctr

    avg_hamming = total_hamming_distance / len(test_data)
    print "Average Hamming distance: %2.2f" % avg_hamming
Example #15
0
def workflow() :
    metric = []
    adset = set([line.strip().split()[1] for line in file(TMP_DATA_DIR_PATH+'topAdClickCnt.dict.final')])
    adidlist = []
    wholeResult = None
    total_advertisments = 0
    for adid in adset :
        if adid in blacklist : continue 
        adidlist.append(adid)
        res, finalres = evaluate(adid, 20, reCal=False, testing=True)
        if wholeResult == None :
            if finalres[-1][0] < 0.2 :
                wholeResult = numpy.array(finalres)
                total_advertisments += 1
        else :
            if finalres[-1][0] < 0.2 :
                wholeResult += numpy.array(finalres)
                total_advertisments += 1
        metric.append(res)
    linenum = 1
    for adid, res in zip(adidlist, metric) :
        print '|'.join([str(linenum), adid, str(res[0]), str(res[1])])
        linenum += 1
    wholeResult = wholeResult / total_advertisments
    print total_advertisments
    print wholeResult
    print numpy.mean(metric, axis=0)
    plotTopDistribution(wholeResult)
def mutate(evo_object1, monograms, bigrams, a, A, B, C, D):
    line_index_to_mutate = random.randint(0, 2)
    mutated_object = deepcopy(evo_object1)

    mutated_object.triple[line_index_to_mutate] = mutate_line(mutated_object.triple[line_index_to_mutate], monograms)

    mutated_object.update_score(evaluate.evaluate(mutated_object.triple, monograms, bigrams, a, A, B, C, D))
    return mutated_object
Example #17
0
	def eval(self, symbol_table):
		logger.debug("self=%s", self)

		t = self.token_list[0]

		s = evaluate(self.token_list, symbol_table)
		print("{}:{}: *** {}. Stop.".format(t.string[0].filename, t.string[0].linenumber, s), file=self.fh)
		sys.exit(1)
def get_new_solution(startSol,startPath,tabuList):
    neighbours = startSol.neighbours(startPath)
    (sol,move) = neighbours[0]
    (solPath,solVal) =  evaluate(sol)
    if move in tabuList:
        solVal = 999999999
    for n in neighbours:
        (newSol, newMove) = n
        (v1, v2) = newMove
        if (newMove and (v2, v1)) not in tabuList:
            (newSolPath,newSolVal) = evaluate(newSol)
            if newSolVal < solVal:
                sol = newSol
                move = newMove
                solVal= newSolVal
                solPath = newSolPath
    return sol, move, solVal, solPath
Example #19
0
def main(files):
    
    #clears files
    with open ('prvalues.txt', 'w'): pass #results
    with open ('ranks.txt', 'w'):  pass #ranks
    with open ('iprec.txt', 'w'): pass #interpolation

    #goes through the topics file and makes a dict
    topics = {}
    with open (files[1], 'r') as f:
        for lines in f:
            info = lines.split(' ')
            topics[info[0]] = ' '.join(info[1:])

    # goes through a qrels file
    # makes a dict {query: list of rel docs}
    qrel = findQrels([files[2]])

    # gets the files in the directory
    tim = getFiles([], files[3])
    
    found = findTerms(tim)
    terms = found[0]
    l = found [1]
    N = len(l)
        
    output = ''
    ap = []
    binOptions = input('1 - binary, 2 - not? ')
    if binOptions != 1:
        wOptions = input('enter: 1 - idf, 2 - length normalization: ')
    
    for q in topics:
        #q is the topic number
        rank = getRanks(topics[q], terms, l, binOptions, wOptions)
        sortRank = sorted(rank.items(), key = operator.itemgetter(1), reverse=True)
        ret = list(x[0] for x in sortRank)
        
        # for the export
        count = 0
        for doc in sortRank:
            count+=1
            output += (q+' Q0 '+doc[0]+' '+str(count)+' '+str(doc[1])+' x\n')

        rel = qrel[q]

        print (q, topics[q].replace('\n', ''))
        ap.append(evaluate(q, ret, rel, N))
        print ('\n')
        
    maps  = sum(ap)/len(ap)
    print (ap)
    print (maps)
    
    # exports precision and recall values
    with open ('ranks.txt', 'w') as f:
            f.write(output)
Example #20
0
 def run_evaluation(self, ref, crank_flag, single_rank_flag, expand_rank_flag):
     file_set = set(self.text_dict.keys())
     ref_file_list = ref.keys()
     relscore = dict()
     eval_score = self.init_eval_score()
     for file_name in deepcopy(file_set):
         if file_name in ref_file_list: 
             relscore[file_name] = read_relscore(self.relscore_path, file_name)
         else:
             print 'ref file<%s> does not exist in the given gold_standard data' % (file_name)
             file_set.remove(file_name)
             self.excluded_files.add(file_name)
     if expand_rank_flag:
         graph_dict = dict()
         for winsize in self.winsize_list:
             graph_dict[winsize] = dict()
             for file_name in file_set: 
                 graph_dict[winsize][file_name] = Graph(file_name,
                                                        self.text_dict[file_name], 
                                                        relscore[file_name], 
                                                        winsize, 
                                                        self.lamb_list, 
                                                        self.cutoff_list, 
                                                        return_graph_flag=True, 
                                                        crank_flag=crank_flag, 
                                                        single_rank_flag=single_rank_flag, 
                                                        input_graph=None).graph
             self.update_neighbor_weight(graph_dict[winsize])
     for winsize in self.winsize_list:
         new_gs = dict()
         for file_name in file_set:
             input_graph = None
             if expand_rank_flag:
                 input_graph = graph_dict[winsize][file_name]
             graph = Graph(file_name,
                           self.text_dict[file_name], 
                           relscore[file_name], 
                           winsize, 
                           self.lamb_list, 
                           self.cutoff_list, 
                           return_graph_flag=False, 
                           crank_flag=crank_flag, 
                           single_rank_flag=single_rank_flag, 
                           input_graph=input_graph,
                           lda_dict=self.lda_dict)
             checked_gs_list = self.check_gold_standard(graph, ref[file_name])
             new_gs[file_name] = checked_gs_list
             for cutoff in self.cutoff_list:
                 for lamb in self.lamb_list:
                     candidates = graph.score_candidates(cutoff, lamb)
                     file_path = '../checked_inspec.ref'
                     save_candidates(file_path, winsize, cutoff, lamb, candidates);
                     eval_score[winsize][cutoff][lamb][file_name] = (
                         evaluate(self.stem_list(candidates), self.stem_list(checked_gs_list)))
     write_gs('./checked_inspec.ref', new_gs)
     return eval_score
def cross_pollinate(evo_object1, evo_object2, monograms, bigrams, a, A, B, C, D):
    new_triple = []
    for y in range(3):
        if random.randint(0, 1) == 0:
            new_triple.append(evo_object1.get_triple()[y])
        else:
            new_triple.append(evo_object2.get_triple()[y])
    new_kid = Evo_object(new_triple[0], new_triple[1], new_triple[2])
    new_kid.update_score(evaluate.evaluate(new_kid.triple, monograms, bigrams, a, A, B, C, D))
    return new_kid
Example #22
0
def evaluator(predictfile='../output/latest/result.txt',realfile='../data/test.csv'):
    r=open(predictfile)
    predictdata={}
    for line in r:
        userid,brandids=line.split()
        predictdata[userid]=brandids.split(',')

    import csv
    reader=csv.reader(open(realfile))
    reader.next()
    testdata={}
    for userid,brandid,dtype,date in reader:
        if dtype=='1':
            brandids=testdata[userid] if testdata.has_key(userid) else []
            if brandid not in brandids:
                brandids.append(brandid)
            testdata[userid]=brandids

    evaluate.evaluate(predictdata,testdata)
Example #23
0
def refevaluate(environment, expression):
	if isTestCommand(expression):
		launchAllTests(environment)
		return

	if expression == NIL:
		return NIL
	if isNumber(expression):
		return expression
	elif isSymbol(expression):
		return environment[getSymbolValue(expression)]
	elif isString(expression):
		return expression
	elif isFunction(environment, expression):
		functionName = getSymbolValue(list_get(expression, 0))
		function = environment["functions"][functionName]["name"]
		argumentsEvaluated = []
		for expressionIndex in range(1, list_getLength(expression)):
			nextArgument = list_get(expression, expressionIndex)
			argumentsEvaluated.append(evaluate(environment, nextArgument))
		metadata = environment["functions"][functionName]
		return function(environment, metadata, argumentsEvaluated)
	elif isMacro(environment, expression):
		macroName = getSymbolValue(list_get(expression, 0))
		macro = environment["macros"][macroName]["name"]
		arguments = []
		for expressionIndex in range(1, list_getLength(expression)):
			arguments.append(list_get(expression, expressionIndex))
		metadata = environment["macros"][macroName]
		return evaluate(environment, macro(environment, metadata, arguments))
	elif isSpecial(environment, expression):
		assert(isSpecial(environment, expression))
		operatorName = getSymbolValue(list_get(expression, 0))
		operator = environment["special"][operatorName]["name"]
		arguments = []
		for expressionIndex in range(1, list_getLength(expression)):
			arguments.append(list_get(expression, expressionIndex))
		metadata = environment["special"][operatorName]
		return operator(environment, metadata, arguments)
	else:
		print("failed on " + expressionToString(expression))
		assert(False)
Example #24
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--input', dest='input', help='Training data file, e.g. data/1_train.txt')
    parser.add_argument('-r', '--oround', dest='oround', default=20, help='Number of output rounds, e.g. 20 [default=20]')
    parser.add_argument('-m', '--model', dest='model', nargs='+', help='Model python library, e.g. modelxxx, should located in \'rgmodels\'')
    parser.add_argument('-val', '--val', dest='val_split', type=int, default=15, help='Split input data to train, val.')

    parser.add_argument('-o', '--ofile', dest='ofile', help='Output data file, e.g. result/1_modelxxx.txt')
    parser.add_argument('-gt', '--gt', dest='groundtruth', help='Groundtruth, use to test the model if provided')
    args = parser.parse_args()

    assert args.input is not None
    assert args.model is not None

    in_data = io.read_database(args.input)
    in_data = numpy.array(in_data)

    assert args.groundtruth is not None
    gt_data = io.read_database(args.groundtruth)

    train_data, val_data = in_data[:, 0:args.val_split], in_data[:, args.val_split:]

    nr_rows = len(train_data)
    nr_cols = len(in_data[0])
    nr_train_cols = len(train_data[0])
    nr_val_cols = len(val_data[0])
    nr_types = numpy.array(train_data).max()+1

    all_outs = list()
    for current_model in args.model:
        current_outs = list()
        for _ in range(10):
            out_data = numpy.zeros((nr_rows, nr_val_cols), dtype=numpy.int32)
            model = load_model(current_model)()
            model.train(train_data, nr_rows, nr_train_cols, nr_types)
            model.predict(out_data, nr_rows, nr_val_cols, nr_types, val_data)
            current_outs.append(out_data)
        all_outs.append(current_outs)
    final_map = ensemble(all_outs, gt=val_data)
    # print(final_map)

    all_outs = list()
    for current_model in args.model:
        out_data = numpy.zeros((nr_rows, args.oround), dtype=numpy.int32)
        model = load_model(current_model)()
        model.train(in_data, nr_rows, nr_cols, nr_types)
        model.predict(out_data, nr_rows, args.oround, nr_types, gt_data)
        all_outs.append(out_data)
    final = ensemble(all_outs, final_map=final_map)
    # final = cheat_ensemble(all_outs, gt_data)

    score, count = evaluate(final, gt_data)
    print("model={} score={}/{}".format(args.model, score, count))
def main():
# sentences=None, size=100, alpha=0.025, window=5, min_count=5,
#         sample=0, seed=1, workers=1, min_alpha=0.0001, sg=1, hs=1, negative=0,
#         cbow_mean=0, hashfxn=hash, iter=1
    corp=sys.argv[1]
    goldfile=sys.argv[2]
    kop=int(sys.argv[3])
#    GORDELEKU="/gscratch/users/maguirrezaba008/vectorFiles/"
    GORDELEKU="/home/magirrezaba008/semantika/bestEmbeddings/results/"
    filename = corp.split("/")[-1]
    from configuration import confs
    sentences=word2vec.LineSentence(corp)
    resu=[]
    for conf in confs:
        strconf=[str(i) for i in conf]
        modelfilename='vectors_'+'---'.join(strconf)+'.bin'
        print "Let's start working with the file "+modelfilename
        if os.path.isfile(GORDELEKU+modelfilename):
            print modelfilename+" exists, so we don't need to train it again!"
            model=Word2Vec.load_word2vec_format(GORDELEKU+modelfilename,None, True)
            print "Loaded!"
        else:
            print "Training with "+filename+" file..."
            model = Word2Vec(sentences, conf[0], conf[1], conf[2], conf[3], conf[4], conf[5], conf[6], conf[7], conf[8], conf[9], conf[10], conf[11], conf[12], conf[13])
            print "Trained!"
        
#        model.save_word2vec_format(modelfilename, binary=True)
#        print "Saved "+modelfilename+" file..."
        (pearsoncoef, significance) = evaluate.evaluate(model, goldfile)
        print "File name: "+modelfilename
        print "Pearson coefficient and significance: "+str(pearsoncoef)+" "+str(significance)
        print

        this={}
        this['name']=modelfilename
        this['coef']=pearsoncoef
        this['significance']=significance
        if len(resu)<kop and this['significance'] < 0.05:
            resu.append(this)
            model.save_word2vec_format(GORDELEKU+modelfilename, binary=True)
            print "Saved "+modelfilename+" file..."
        elif this['significance'] < 0.05 and min(resu, key=lambda s: s['coef'])['coef']<this['coef']:
            resu.append(this)
            model.save_word2vec_format(GORDELEKU+modelfilename, binary=True)
            print "Saved "+modelfilename+" file..."
            mini=min(resu, key=lambda s: s['coef'])
            ignore(GORDELEKU+mini['name'])
            resu.remove(mini)
        else:
            ignore(GORDELEKU+this['name'])
        del(model)
        print "resu has "+str(len(resu))+" elements!"
Example #26
0
def main(files):
    with open ('prvalues.txt', 'w'): pass #results
    with open ('ranks.txt', 'w'):  pass #ranks
    with open ('iprec.txt', 'w'): pass #interpolation


    #goes through the topics file and makes a dict
    topics = {}
    with open (sys.argv[1], 'r') as f:
        for lines in f:
            info = lines.split(' ')
            topics[info[0]] = ' '.join(info[1:])

    qrels = findQrels([files[2]])


    # gets the files in the directory
    #tim = getFiles([], files[3])
    
    terms, l = findTerms(files[3:])
    N = len(l)
    terms = get(terms, l)

    output = ''
    ap = []
    #query = input("Enter: ")
    #topics = [query]

    for q in topics:
        #q is the topic number
        rank = getRanks(topics[q], terms)
        sortRank = sorted(rank.items(), key = operator.itemgetter(1), reverse=True)
        ret = list(x[0] for x in sortRank)
        
        # for the export
        count = 0
        for doc in sortRank:
            count+=1
            output += (q+' Q0 '+doc[0]+' '+str(count)+' '+str(doc[1])+' x\n')

        rel = qrel[q]
        print (q, topics[q].replace('\n', ''))
        ap.append(evaluate(q, ret, rel, N))
        print ('\n')
        
    maps  = sum(ap)/len(ap)
    print (ap)
    print (maps)
    
    # exports precision and recall values
    with open ('ranks.txt', 'w') as f:
            f.write(output)
Example #27
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '--input', dest='input', help='Training data file, e.g. data/1_train.txt')
    parser.add_argument('-r', '--oround', dest='oround', default=20, help='Number of output rounds, e.g. 20 [default=20]')
    parser.add_argument('-m', '--model', dest='model', nargs='+', help='Model python library, e.g. modelxxx, should located in \'rgmodels\'')

    parser.add_argument('-o', '--ofile', dest='ofile', help='Output data file, e.g. result/1_modelxxx.txt')
    parser.add_argument('-gt', '--gt', dest='groundtruth', help='Groundtruth, use to test the model if provided')
    args = parser.parse_args()

    assert args.input is not None
    assert args.model is not None
    assert args.groundtruth is not None

    in_data = io.read_database(args.input)
    in_data = numpy.array(in_data)
    gt_data = io.read_database(args.groundtruth)
    gt_data = numpy.array(gt_data)
    all_data = numpy.concatenate([in_data, gt_data], axis=1)

    nr_rows = len(all_data)
    nr_types = numpy.array(all_data).max()+1

    final_data = numpy.zeros_like(gt_data)
    for i in range(30, 50):
        for j1 in range(all_data.shape[0]):
            best_score, best_model = -1, None
            for current_model in args.model:
                sum_score = 0
                for _ in range(1):
                    train_data = all_data[:, 0:25]
                    val_data = all_data[:, 25:i]
                    out_data = numpy.zeros_like(val_data)
                    model = load_model(current_model)()
                    model.train(train_data, nr_rows, train_data.shape[1], nr_types)
                    model.predict(out_data, nr_rows, val_data.shape[1], nr_types, val_data)
                    for j2 in range(val_data.shape[1]):
                        sum_score += 1 if val_data[j1, j2] == out_data[j1, j2] else 0
                if sum_score > best_score:
                    best_score = sum_score
                    best_model = current_model

            current_final_out = numpy.zeros_like(all_data[:, i:i+1])
            print(i, j1, best_model)
            model = load_model(best_model)()
            model.train(all_data[:, 0:i], nr_rows, i, nr_types)
            model.predict(current_final_out, nr_rows, 1, nr_types, all_data[:, i:i+1])
            final_data[j1, i-30] = current_final_out[j1, 0]

    score, count = evaluate(final_data, gt_data)
    print("model={} score={}/{}".format(args.model, score, count))
def train(opts):
    train_file = opts.train
    model_file = opts.model
    beam_size = opts.beam_size
    assert (train_file != "")
    assert (model_file != "")
    assert (beam_size >= 1)
#    if not os.path.exits(model_file):
#        params = {}
#    else:
#        params = pickle.load(open(model_file))
    params = {}
    data = []
    with codecs.open(train_file, encoding='utf-8') as infile:
        for line in infile:
            words = line.strip().split()
            data.append(words)
    num_instances = len(data)
    if opts.dev:
        dev_data = []
        dev_file = opts.dev
        with codecs.open(dev_file, encoding='utf-8') as infile:
            for line in infile:
                words = line.strip().split()
                dev_data.append(words)


    for nr_iter in xrange(opts.iteration):
        for idx, sentence in enumerate(data):
            now = nr_iter * num_instances + 1
            gold_actions = get_gold_actions(sentence)
            raw_sentence = ''.join(sentence)
            beam_search(True, raw_sentence, beam_size, params, gold_actions, now)

        temp_model_file = "{0}.{1}".format(model_file, nr_iter+1)
        assert(len(params)>0)
        flush_parameters(params, now)
        pickle.dump(params, open(temp_model_file, 'w'))
        if opts.dev:
            temp_file = "temp/temp%s" % nr_iter
            with open(temp_file, 'w') as outfile:
                for sentence in dev_data:
                    raw_sentence = ''.join(sentence)
                    words = beam_search(False, raw_sentence, beam_size, params)
                    line = ' '.join(words)
                    line += '\n'
#                    print raw_sentence
#                    print line.strip()
                    outfile.write(line)
            p, r, f = evaluate(temp_file, dev_file)
            print "Precision:{0}, Recall:{1}, Fscore:{2}".format(p, r, f)
Example #29
0
def _ai_min_max(board, alpha, beta, competence, depth=DEPTH-1):
    hsh = board.zobrist_hash()
    score = search_hash(depth, alpha, beta, hsh)
    if score:
        return score
    if board.is_game_over() or depth == 0:
        score = evaluate(board)
        input_hash(hsh, depth, score, exact_flag)
        return score
    else:
        if board.turn == chess.WHITE:
            best_moves_and_alphas = []
            for move in check_best_move(hsh, board.generate_legal_moves()):
                board.push(move)  # make move
                score = _ai_min_max(board, alpha, beta, competence, depth-1)
                board.pop()  # unmake move
                if score > alpha:
                    alpha = score
                    best_moves_and_alphas.append((move, alpha))
                    if alpha >= beta:
                        break
            if not best_moves_and_alphas:
                input_hash(hsh, depth, alpha, alpha_flag, None)
                return alpha
            index = int(round((len(best_moves_and_alphas)-1) * competence))
            best_move_and_alpha = best_moves_and_alphas[index]
            # attempt at cutting out very bad moves that the player would
            # obviously counter 50 is arbitrary my thinking was the player
            # would notice missing half a pawn their is probably a better way
            # to do this so that the weight isn't constant and is based off of
            # the players competence level.
            while abs(best_move_and_alpha[1] - best_moves_and_alphas[-1][1]) >= 50:
                index += 1
                best_move_and_alpha = best_moves_and_alphas[index]
            input_hash(hsh, depth, best_move_and_alpha[1], alpha_flag,
                       best_move_and_alpha[0])
            return best_move_and_alpha[1]

        else:
            best_move = None
            for move in check_best_move(hsh, board.generate_legal_moves()):
                board.push(move)  # make move
                score = _ai_min_max(board, alpha, beta, competence, depth-1)
                board.pop()  # unmake move
                if score < beta:
                    beta = score
                    if alpha >= beta:
                        break
            input_hash(hsh, depth, beta, beta_flag, best_move)
            return beta
Example #30
0
def linear_regression_multivariant(X_train, X_test, Y_train, Y_test, cost_fun='ordinary_least_squares'):
    if cost_fun == 'ordinary_least_squares':
        regr = linear_model.LinearRegression()
    elif cost_fun == 'Ridge_Regression':
        regr = linear_model.Ridge(alpha=1)
    elif cost_fun == 'Bayesian_Regression':
        regr = linear_model.BayesianRidge()
    elif cost_fun == 'SVR':
        regr = SVR(C=1.0, epsilon=0.2, kernel='linear')
    elif cost_fun == 'KNN_Reg':
        regr = neighbors.KNeighborsRegressor(5, weights='distance')
    else:
        raise Exception('The type of cost function is not specified.')

    # Train the model using the training sets
    regr.fit(X_train, Y_train)
    predict = regr.predict(X_test)
    # record the experiment performance, Explained variance score: 1 is perfect prediction
    np.seterr(invalid='ignore')
    print(list(predict)[:100])
    print(Y_test[:100])
    evaluate(list(predict), np.array(Y_test),
             'linear regression ' + 'Explained variance score: %.2f' % regr.score(X_test, Y_test))
Example #31
0
def main(args):
    torch.manual_seed(0)

    # Get device
    device = torch.device('cuda'if torch.cuda.is_available()else 'cpu')
    
    # Get dataset
    dataset = Dataset("train.txt") 
    loader = DataLoader(dataset, batch_size=hp.batch_size**2, shuffle=True, 
        collate_fn=dataset.collate_fn, drop_last=True, num_workers=0)

    # Define model
    model = nn.DataParallel(FastSpeech2()).to(device)
    print("Model Has Been Defined")
    num_param = utils.get_param_num(model)
    print('Number of FastSpeech2 Parameters:', num_param)

    # Optimizer and loss
    optimizer = torch.optim.Adam(model.parameters(), betas=hp.betas, eps=hp.eps, weight_decay = hp.weight_decay)
    scheduled_optim = ScheduledOptim(optimizer, hp.decoder_hidden, hp.n_warm_up_step, args.restore_step)
    Loss = FastSpeech2Loss().to(device) 
    print("Optimizer and Loss Function Defined.")

    # Load checkpoint if exists
    checkpoint_path = os.path.join(hp.checkpoint_path)
    try:
        checkpoint = torch.load(os.path.join(
            checkpoint_path, 'checkpoint_{}.pth.tar'.format(args.restore_step)))
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print("\n---Model Restored at Step {}---\n".format(args.restore_step))
    except:
        print("\n---Start New Training---\n")
        if not os.path.exists(checkpoint_path):
            os.makedirs(checkpoint_path)

    # Load vocoder
    if hp.vocoder == 'melgan':
        melgan = utils.get_melgan()
        melgan.to(device)
    elif hp.vocoder == 'waveglow':
        waveglow = utils.get_waveglow()
        waveglow.to(device)

    # Init logger
    log_path = hp.log_path
    if not os.path.exists(log_path):
        os.makedirs(log_path)
    logger = SummaryWriter(log_path)

    # Init synthesis directory
    synth_path = hp.synth_path
    if not os.path.exists(synth_path):
        os.makedirs(synth_path)

    # Define Some Information
    Time = np.array([])
    Start = time.perf_counter()
    
    # Training
    model = model.train()
    for epoch in range(hp.epochs):
        # Get Training Loader
        total_step = hp.epochs * len(loader) * hp.batch_size

        for i, batchs in enumerate(loader):
            for j, data_of_batch in enumerate(batchs):
                start_time = time.perf_counter()

                current_step = i*hp.batch_size + j + args.restore_step + epoch*len(loader)*hp.batch_size + 1

                # Init
                scheduled_optim.zero_grad()

                # Get Data
                text = torch.from_numpy(data_of_batch["text"]).long().to(device)
                mel_target = torch.from_numpy(data_of_batch["mel_target"]).float().to(device)
                D = torch.from_numpy(data_of_batch["D"]).int().to(device)
                f0 = torch.from_numpy(data_of_batch["f0"]).float().to(device)
                energy = torch.from_numpy(data_of_batch["energy"]).float().to(device)
                mel_pos = torch.from_numpy(data_of_batch["mel_pos"]).long().to(device)
                src_pos = torch.from_numpy(data_of_batch["src_pos"]).long().to(device)
                src_len = torch.from_numpy(data_of_batch["src_len"]).long().to(device)
                mel_len = torch.from_numpy(data_of_batch["mel_len"]).long().to(device)
                max_len = max(data_of_batch["mel_len"]).astype(np.int16)
                
                # Forward
                mel_output, mel_postnet_output, duration_output, f0_output, energy_output = model(
                    text, src_pos, mel_pos, max_len, D, f0, energy)
                
                # Cal Loss
                mel_loss, mel_postnet_loss, d_loss, f_loss, e_loss = Loss(
                        duration_output, D, f0_output, f0, energy_output, energy, mel_output, mel_postnet_output, mel_target, src_len, mel_len)
                total_loss = mel_loss + mel_postnet_loss + d_loss + f_loss + e_loss
                 
                # Logger
                t_l = total_loss.item()
                m_l = mel_loss.item()
                m_p_l = mel_postnet_loss.item()
                d_l = d_loss.item()
                f_l = f_loss.item()
                e_l = e_loss.item()
                with open(os.path.join(log_path, "total_loss.txt"), "a") as f_total_loss:
                    f_total_loss.write(str(t_l)+"\n")
                with open(os.path.join(log_path, "mel_loss.txt"), "a") as f_mel_loss:
                    f_mel_loss.write(str(m_l)+"\n")
                with open(os.path.join(log_path, "mel_postnet_loss.txt"), "a") as f_mel_postnet_loss:
                    f_mel_postnet_loss.write(str(m_p_l)+"\n")
                with open(os.path.join(log_path, "duration_loss.txt"), "a") as f_d_loss:
                    f_d_loss.write(str(d_l)+"\n")
                with open(os.path.join(log_path, "f0_loss.txt"), "a") as f_f_loss:
                    f_f_loss.write(str(f_l)+"\n")
                with open(os.path.join(log_path, "energy_loss.txt"), "a") as f_e_loss:
                    f_e_loss.write(str(e_l)+"\n")
                 
                # Backward
                total_loss.backward()

                # Clipping gradients to avoid gradient explosion
                nn.utils.clip_grad_norm_(model.parameters(), hp.grad_clip_thresh)

                # Update weights
                scheduled_optim.step_and_update_lr()
                
                # Print
                if current_step % hp.log_step == 0:
                    Now = time.perf_counter()

                    str1 = "Epoch [{}/{}], Step [{}/{}]:".format(
                        epoch+1, hp.epochs, current_step, total_step)
                    str2 = "Total Loss: {:.4f}, Mel Loss: {:.4f}, Mel PostNet Loss: {:.4f}, Duration Loss: {:.4f}, F0 Loss: {:.4f}, Energy Loss: {:.4f};".format(
                        t_l, m_l, m_p_l, d_l, f_l, e_l)
                    str3 = "Time Used: {:.3f}s, Estimated Time Remaining: {:.3f}s.".format(
                        (Now-Start), (total_step-current_step)*np.mean(Time))

                    print("\n" + str1)
                    print(str2)
                    print(str3)
                    
                    with open(os.path.join(log_path, "log.txt"), "a") as f_log:
                        f_log.write(str1 + "\n")
                        f_log.write(str2 + "\n")
                        f_log.write(str3 + "\n")
                        f_log.write("\n")

                    logger.add_scalars('Loss/total_loss', {'training': t_l}, current_step)
                    logger.add_scalars('Loss/mel_loss', {'training': m_l}, current_step)
                    logger.add_scalars('Loss/mel_postnet_loss', {'training': m_p_l}, current_step)
                    logger.add_scalars('Loss/duration_loss', {'training': d_l}, current_step)
                    logger.add_scalars('Loss/F0_loss', {'training': f_l}, current_step)
                    logger.add_scalars('Loss/energy_loss', {'training': e_l}, current_step)
                
                if current_step % hp.save_step == 0:
                    torch.save({'model': model.state_dict(), 'optimizer': optimizer.state_dict(
                    )}, os.path.join(checkpoint_path, 'checkpoint_{}.pth.tar'.format(current_step)))
                    print("save model at step {} ...".format(current_step))

                if current_step % hp.synth_step == 0:
                    length = mel_len[0].item()
                    mel_target_torch = mel_target[0, :length].detach().unsqueeze(0).transpose(1, 2)
                    mel_target = mel_target[0, :length].detach().cpu().transpose(0, 1)
                    mel_torch = mel_output[0, :length].detach().unsqueeze(0).transpose(1, 2)
                    mel = mel_output[0, :length].detach().cpu().transpose(0, 1)
                    mel_postnet_torch = mel_postnet_output[0, :length].detach().unsqueeze(0).transpose(1, 2)
                    mel_postnet = mel_postnet_output[0, :length].detach().cpu().transpose(0, 1)
                    Audio.tools.inv_mel_spec(mel, os.path.join(synth_path, "step_{}_griffin_lim.wav".format(current_step)))
                    Audio.tools.inv_mel_spec(mel_postnet, os.path.join(synth_path, "step_{}_postnet_griffin_lim.wav".format(current_step)))
                    
                    if hp.vocoder == 'melgan':
                        utils.melgan_infer(mel_torch, melgan, os.path.join(hp.test_path, 'step_{}_{}.wav'.format(current_step, hp.vocoder)))
                        utils.melgan_infer(mel_postnet_torch, melgan, os.path.join(hp.test_path, 'step_{}_postnet_{}.wav'.format(current_step, hp.vocoder)))
                        utils.melgan_infer(mel_target_torch, melgan, os.path.join(hp.test_path, 'step_{}_ground-truch_{}.wav'.format(current_step, hp.vocoder)))
                    elif hp.vocoder == 'waveglow':
                        utils.waveglow_infer(mel_torch, waveglow, os.path.join(hp.test_path, 'step_{}_{}.wav'.format(current_step, hp.vocoder)))
                        utils.waveglow_infer(mel_postnet_torch, waveglow, os.path.join(hp.test_path, 'step_{}_postnet_{}.wav'.format(current_step, hp.vocoder)))
                        utils.waveglow_infer(mel_target_torch, waveglow, os.path.join(hp.test_path, 'step_{}_ground-truch_{}.wav'.format(current_step, hp.vocoder)))
                    
                    f0 = f0[0, :length].detach().cpu().numpy()
                    energy = energy[0, :length].detach().cpu().numpy()
                    f0_output = f0_output[0, :length].detach().cpu().numpy()
                    energy_output = energy_output[0, :length].detach().cpu().numpy()
                    
                    utils.plot_data([(mel_postnet.numpy(), f0_output, energy_output), (mel_target.numpy(), f0, energy)], 
                        ['Synthetized Spectrogram', 'Ground-Truth Spectrogram'], filename=os.path.join(synth_path, 'step_{}.png'.format(current_step)))
                
                if current_step % hp.eval_step == 0:
                    model.eval()
                    with torch.no_grad():
                        d_l, f_l, e_l, m_l, m_p_l = evaluate(model, current_step)
                        t_l = d_l + f_l + e_l + m_l + m_p_l
                        
                        logger.add_scalars('Loss/total_loss', {'validation': t_l}, current_step)
                        logger.add_scalars('Loss/mel_loss', {'validation': m_l}, current_step)
                        logger.add_scalars('Loss/mel_postnet_loss', {'validation': m_p_l}, current_step)
                        logger.add_scalars('Loss/duration_loss', {'validation': d_l}, current_step)
                        logger.add_scalars('Loss/F0_loss', {'validation': f_l}, current_step)
                        logger.add_scalars('Loss/energy_loss', {'validation': e_l}, current_step)

                    model.train()

                end_time = time.perf_counter()
                Time = np.append(Time, end_time - start_time)
                if len(Time) == hp.clear_Time:
                    temp_value = np.mean(Time)
                    Time = np.delete(
                        Time, [i for i in range(len(Time))], axis=None)
                    Time = np.append(Time, temp_value)
Example #32
0
def set_test(model, test_iter):
    if not test_iter.is_test:
        test_iter.is_test = True
    model.eval()
    with torch.no_grad():
        pred_score_dict = {}
        pred_answer_dict = {}
        true_answer_list = []
        pred_answer_list = []
        for input_ids, input_mask, segment_ids, start_list, end_list, uid_list, answer_list, text_list, querylen_list, maping_list, _, seq_length in tqdm(
                test_iter):
            input_ids = list2ts2device(input_ids)
            input_mask = list2ts2device(input_mask)
            segment_ids = list2ts2device(segment_ids)
            y_preds = model(input_ids=input_ids,
                            token_type_ids=segment_ids,
                            attention_mask=input_mask)
            start_preds, end_preds = (p.detach().cpu() for p in y_preds)
            start_probs = softmax(start_preds.numpy())
            end_probs = softmax(end_preds.numpy())
            # cls_probs = softmax(cls_pred.numpy())
            max_a_len = 64
            answer_pred = []
            epsilon = 1e-3
            w1, w2 = 0.9, 0.1
            for i in range(len(start_probs)):
                start_ = start_probs[i][querylen_list[i]:-1]
                end_ = end_probs[i][querylen_list[i]:-1]
                start_end, score = None, -100
                for start, p_start in enumerate(start_):
                    for end, p_end in enumerate(end_):
                        if end >= start and end < start + max_a_len:
                            """一定有答案"""
                            if p_start * p_end > score:
                                start_end = (start, end)
                                score = p_start * p_end
                                # score_ = np.exp((0.5 * np.log(p_start + epsilon) + 0.5 * np.log(p_end + epsilon)) / (0.5 + 0.5))
                start, end = start_end
                """"""
                # out_class_=cls_probs[i][1]
                start_cls = start_probs[i][0]
                end_cls = end_probs[i][0]
                pos_cls = -(start_cls + end_cls)
                if config.addneg:
                    score += pos_cls
                # score = np.exp((w1 * np.log(out_class_ + epsilon) + w2 * np.log(score_ + epsilon)) / (w1 + w2))
                try:
                    start_pos = maping_list[i][start][0]
                    end_pos = maping_list[i][end][-1]
                    answer = text_list[i][start_pos:end_pos + 1]
                except:
                    answer = ''
                if uid_list[i] in pred_answer_dict:
                    if pred_score_dict[uid_list[i]] < score:
                        pred_score_dict[uid_list[i]] = score
                        pred_answer_dict[uid_list[i]] = answer
                else:
                    pred_answer_dict[uid_list[i]] = answer
                    pred_score_dict[uid_list[i]] = score
                answer_pred.append(answer)
            true_answer_list.extend(answer_list)
            pred_answer_list.extend(answer_pred)
        assert len(true_answer_list) == len(pred_answer_list)
        print(true_answer_list)
        print(pred_answer_list)
        c = 0
        for i in range(len(true_answer_list)):
            if true_answer_list[i] == pred_answer_list[i]:
                c += 1
        print('em分数为', c / len(true_answer_list))
        # pred_dict=dict(zip(qid_list,pred_answer_list))
        true_dict = json.load(
            open(config.data + 'dureader_robust-data/dev.json'))
        F1, EM, TOTAL, SKIP = evaluate(true_dict, pred_answer_dict)
        print('F1: {}, EM {}, TOTAL {}'.format(F1, EM, TOTAL))

        return F1, EM
Example #33
0
def main(args):
    torch.manual_seed(0)

    # Get device
    device = torch.device('cuda'if torch.cuda.is_available()else 'cpu')
    
    # Get dataset
    dataset = Dataset("train.txt") 
    loader = DataLoader(dataset, batch_size=hp.batch_size**2, shuffle=True, 
        collate_fn=dataset.collate_fn, drop_last=True, num_workers=0)

    # Define model
    model = nn.DataParallel(FastSpeech2()).to(device)
    print("Model Has Been Defined")
    num_param = utils.get_param_num(model)
    print('Number of FastSpeech2 Parameters:', num_param)

    # Optimizer and loss
    optimizer = torch.optim.Adam(model.parameters(), betas=hp.betas, eps=hp.eps, weight_decay = hp.weight_decay)
    scheduled_optim = ScheduledOptim(optimizer, hp.decoder_hidden, hp.n_warm_up_step, args.restore_step)
    Loss = FastSpeech2Loss().to(device) 
    print("Optimizer and Loss Function Defined.")

    # Load checkpoint if exists
    checkpoint_path = os.path.join(hp.checkpoint_path)
    try:
        checkpoint = torch.load(os.path.join(
            checkpoint_path, 'checkpoint_{}.pth.tar'.format(args.restore_step)))
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print("\n---Model Restored at Step {}---\n".format(args.restore_step))
    except:
        print("\n---Start New Training---\n")
        if not os.path.exists(checkpoint_path):
            os.makedirs(checkpoint_path)

    # read params
    mean_mel, std_mel = torch.tensor(np.load(os.path.join(hp.preprocessed_path, "mel_stat.npy")), dtype=torch.float).to(device)
    mean_f0, std_f0 = torch.tensor(np.load(os.path.join(hp.preprocessed_path, "f0_stat.npy")), dtype=torch.float).to(device)
    mean_energy, std_energy = torch.tensor(np.load(os.path.join(hp.preprocessed_path, "energy_stat.npy")), dtype=torch.float).to(device)

    mean_mel, std_mel = mean_mel.reshape(1, -1), std_mel.reshape(1, -1)
    mean_f0, std_f0 = mean_f0.reshape(1, -1), std_f0.reshape(1, -1)
    mean_energy, std_energy = mean_energy.reshape(1, -1), std_energy.reshape(1, -1)


    # Load vocoder
    if hp.vocoder == 'vocgan':
        vocoder = utils.get_vocgan(ckpt_path = hp.vocoder_pretrained_model_path)
        vocoder.to(device)
    else:
        vocoder = None

    # Init logger
    log_path = hp.log_path
    if not os.path.exists(log_path):
        os.makedirs(log_path)
        os.makedirs(os.path.join(log_path, 'train'))
        os.makedirs(os.path.join(log_path, 'validation'))
    train_logger = SummaryWriter(os.path.join(log_path, 'train'))
    val_logger = SummaryWriter(os.path.join(log_path, 'validation'))

    # Define Some Information
    Time = np.array([])
    Start = time.perf_counter()
    
    # Training
    model = model.train()
    for epoch in range(hp.epochs):
        # Get Training Loader
        total_step = hp.epochs * len(loader) * hp.batch_size

        for i, batchs in enumerate(loader):
            for j, data_of_batch in enumerate(batchs):
                start_time = time.perf_counter()

                current_step = i*hp.batch_size + j + args.restore_step + epoch*len(loader)*hp.batch_size + 1
                
                # Get Data
                text = torch.from_numpy(data_of_batch["text"]).long().to(device)
                mel_target = torch.from_numpy(data_of_batch["mel_target"]).float().to(device)
                D = torch.from_numpy(data_of_batch["D"]).long().to(device)
                log_D = torch.from_numpy(data_of_batch["log_D"]).float().to(device)
                f0 = torch.from_numpy(data_of_batch["f0"]).float().to(device)
                energy = torch.from_numpy(data_of_batch["energy"]).float().to(device)
                src_len = torch.from_numpy(data_of_batch["src_len"]).long().to(device)
                mel_len = torch.from_numpy(data_of_batch["mel_len"]).long().to(device)
                max_src_len = np.max(data_of_batch["src_len"]).astype(np.int32)
                max_mel_len = np.max(data_of_batch["mel_len"]).astype(np.int32)
                
                # Forward
                mel_output, mel_postnet_output, log_duration_output, f0_output, energy_output, src_mask, mel_mask, _ = model(
                    text, src_len, mel_len, D, f0, energy, max_src_len, max_mel_len)
                
                # Cal Loss
                mel_loss, mel_postnet_loss, d_loss, f_loss, e_loss = Loss(
                        log_duration_output, log_D, f0_output, f0, energy_output, energy, mel_output, mel_postnet_output, mel_target, ~src_mask, ~mel_mask)
                total_loss = mel_loss + mel_postnet_loss + d_loss + f_loss + e_loss
                 
                # Logger
                t_l = total_loss.item()
                m_l = mel_loss.item()
                m_p_l = mel_postnet_loss.item()
                d_l = d_loss.item()
                f_l = f_loss.item()
                e_l = e_loss.item()
                with open(os.path.join(log_path, "total_loss.txt"), "a") as f_total_loss:
                    f_total_loss.write(str(t_l)+"\n")
                with open(os.path.join(log_path, "mel_loss.txt"), "a") as f_mel_loss:
                    f_mel_loss.write(str(m_l)+"\n")
                with open(os.path.join(log_path, "mel_postnet_loss.txt"), "a") as f_mel_postnet_loss:
                    f_mel_postnet_loss.write(str(m_p_l)+"\n")
                with open(os.path.join(log_path, "duration_loss.txt"), "a") as f_d_loss:
                    f_d_loss.write(str(d_l)+"\n")
                with open(os.path.join(log_path, "f0_loss.txt"), "a") as f_f_loss:
                    f_f_loss.write(str(f_l)+"\n")
                with open(os.path.join(log_path, "energy_loss.txt"), "a") as f_e_loss:
                    f_e_loss.write(str(e_l)+"\n")
                 
                # Backward
                total_loss = total_loss / hp.acc_steps
                total_loss.backward()
                if current_step % hp.acc_steps != 0:
                    continue

                # Clipping gradients to avoid gradient explosion
                nn.utils.clip_grad_norm_(model.parameters(), hp.grad_clip_thresh)

                # Update weights
                scheduled_optim.step_and_update_lr()
                scheduled_optim.zero_grad()
                
                # Print
                if current_step % hp.log_step == 0:
                    Now = time.perf_counter()

                    str1 = "Epoch [{}/{}], Step [{}/{}]:".format(
                        epoch+1, hp.epochs, current_step, total_step)
                    str2 = "Total Loss: {:.4f}, Mel Loss: {:.4f}, Mel PostNet Loss: {:.4f}, Duration Loss: {:.4f}, F0 Loss: {:.4f}, Energy Loss: {:.4f};".format(
                        t_l, m_l, m_p_l, d_l, f_l, e_l)
                    str3 = "Time Used: {:.3f}s, Estimated Time Remaining: {:.3f}s.".format(
                        (Now-Start), (total_step-current_step)*np.mean(Time))

                    print("\n" + str1)
                    print(str2)
                    print(str3)
                    
                    with open(os.path.join(log_path, "log.txt"), "a") as f_log:
                        f_log.write(str1 + "\n")
                        f_log.write(str2 + "\n")
                        f_log.write(str3 + "\n")
                        f_log.write("\n")

                train_logger.add_scalar('Loss/total_loss', t_l, current_step)
                train_logger.add_scalar('Loss/mel_loss', m_l, current_step)
                train_logger.add_scalar('Loss/mel_postnet_loss', m_p_l, current_step)
                train_logger.add_scalar('Loss/duration_loss', d_l, current_step)
                train_logger.add_scalar('Loss/F0_loss', f_l, current_step)
                train_logger.add_scalar('Loss/energy_loss', e_l, current_step)
                
                if current_step % hp.save_step == 0:
                    torch.save({'model': model.state_dict(), 'optimizer': optimizer.state_dict(
                    )}, os.path.join(checkpoint_path, 'checkpoint_{}.pth.tar'.format(current_step)))
                    print("save model at step {} ...".format(current_step))

                if current_step % hp.eval_step == 0:
                    model.eval()
                    with torch.no_grad():
                        d_l, f_l, e_l, m_l, m_p_l = evaluate(model, current_step, vocoder)
                        t_l = d_l + f_l + e_l + m_l + m_p_l
                        
                        val_logger.add_scalar('Loss/total_loss', t_l, current_step)
                        val_logger.add_scalar('Loss/mel_loss', m_l, current_step)
                        val_logger.add_scalar('Loss/mel_postnet_loss', m_p_l, current_step)
                        val_logger.add_scalar('Loss/duration_loss', d_l, current_step)
                        val_logger.add_scalar('Loss/F0_loss', f_l, current_step)
                        val_logger.add_scalar('Loss/energy_loss', e_l, current_step)

                    model.train()

                end_time = time.perf_counter()
                Time = np.append(Time, end_time - start_time)
                if len(Time) == hp.clear_Time:
                    temp_value = np.mean(Time)
                    Time = np.delete(
                        Time, [i for i in range(len(Time))], axis=None)
                    Time = np.append(Time, temp_value)
Example #34
0
def train_and_evaluate(model, train_dataloader, val_dataloader, optimizer, loss_fn, metrics, params, setting, args,
                       writer=None, logdir=None, restore_file=None):
    """Train the model and evaluate every epoch.

    Args:
        model: (torch.nn.Module) the neural network
        train_dataloader: (DataLoader) a torch.utils.data.DataLoader object that fetches training data
        val_dataloader: (DataLoader) a torch.utils.data.DataLoader object that fetches validation data
        optimizer: (torch.optim) optimizer for parameters of model
        loss_fn: a function that takes batch_output and batch_labels and computes the loss for the batch
        metrics: (dict) a dictionary of functions that compute a metric using mnisthe output and labels of each batch
        params: (Params) hyperparameters
        model_dir: (string) directory containing config, weights and log
        restore_file: (string) optional- name of file to restore from (withoutmnistits extension .pth.tar)
        covar_mode: (bool) does the data-loader give back covariates / additional data
    """

    # setup directories for data
    setting_home = setting.home
    if not args.fase == "feature":
        data_dir = os.path.join(setting_home, "data")
    else:
        if setting.mode3d:
            data_dir = "data"
        else:
            data_dir = "slices"
    covar_mode = setting.covar_mode

    x_frozen = False


    best_val_metric = 0.0
    if "loss" in setting.metrics[0]:
        best_val_metric = 1.0e6

    val_preds = np.zeros((len(val_dataloader.dataset), params.num_epochs))

    for epoch in range(params.num_epochs):

        # Run one epoch
        logging.info(f"Epoch {epoch+1}/{params.num_epochs}; setting: {args.setting}, fase {args.fase}, experiment: {args.experiment}")

        # compute number of batches in one epoch (one full pass over the training set)
        train_metrics = train(model, optimizer, loss_fn, train_dataloader, metrics, params, setting, writer, epoch)
        print(train_metrics)
        for metric_name in train_metrics.keys():
            metric_vals = {'train': train_metrics[metric_name]}
            writer.add_scalars(metric_name, metric_vals, epoch+1)


        # for name, param in model.named_parameters():
        #     writer.add_histogram(name, param.clone().cpu().data.numpy(), epoch+1)
        
        if epoch % params.save_summary_steps == 0:

            # Evaluate for one epoch on validation set
            valid_metrics, outtensors = evaluate(model, loss_fn, val_dataloader, metrics, params, setting, epoch, writer) 
            valid_metrics["intercept"] = model.regressor.fc.bias.detach().cpu().numpy()
            print(valid_metrics) 
            
            for name, module in model.regressor.named_children():
                if name == "t":
                    valid_metrics["b_t"] = module.weight.detach().cpu().numpy()
                elif name == "zt":
                    weights = module.weight.detach().cpu().squeeze().numpy().reshape(-1)
                    for i, weight in enumerate(weights):
                        valid_metrics["b_zt"+str(i)] = weight
                else:
                    pass
            for metric_name in valid_metrics.keys():
                metric_vals = {'valid': valid_metrics[metric_name]}
                writer.add_scalars(metric_name, metric_vals, epoch+1)

            # create plots
            val_df = val_dataloader.dataset.df
            xx_scatter    = net.make_scatter_plot(val_df.x.values, outtensors['xhat'], xlabel='x', ylabel='xhat') 
            xtruex_scatter= net.make_scatter_plot(val_df.x_true.values, outtensors['xhat'], xlabel='x', ylabel='xhat') 
            xyhat_scatter = net.make_scatter_plot(val_df.x.values, outtensors['predictions'], c=val_df.t, xlabel='x', ylabel='yhat')
            zyhat_scatter = net.make_scatter_plot(val_df.z.values, outtensors['predictions'], c=val_df.t, xlabel='z', ylabel='yhat')
            yy_scatter    = net.make_scatter_plot(val_df.y.values, outtensors['predictions'], c=val_df.t, xlabel='yhat', ylabel='y') 
            writer.add_figure('x-xhat/valid', xx_scatter, epoch+1)
            writer.add_figure('xtrue-xhat/valid', xtruex_scatter, epoch+1)
            writer.add_figure('x-yhat/valid', xyhat_scatter, epoch+1)
            writer.add_figure('z-yhat/valid', zyhat_scatter, epoch+1)
            writer.add_figure('y-yhat/valid', yy_scatter, epoch+1)

            if params.save_preds:
                # writer.add_histogram("predictions", preds)
                if setting.num_classes == 1:
                    val_preds[:, epoch] = np.squeeze(outtensors['predictions'])
                    
                    # write preds to file
                    pred_fname = os.path.join(setting.home, setting.fase+"-fase", "preds_val.csv")
                    with open(pred_fname, 'ab') as f:
                        np.savetxt(f, preds.T, newline="")

                np.save(os.path.join(setting.home, setting.fase+"-fase", "preds.npy"), preds)

            else:
                val_metric = valid_metrics[setting.metrics[0]]
            if "loss" in str(setting.metrics[0]):
                is_best = val_metric<=best_val_metric
            else:
                is_best = val_metric>=best_val_metric

            # Save weights
            state_dict = model.state_dict()
            optim_dict = optimizer.state_dict()

            state = {
                'epoch': epoch+1,
                'state_dict': state_dict,
                'optim_dict': optim_dict
            }


            utils.save_checkpoint(state,
                                is_best=is_best,
                                checkpoint=logdir)

            # If best_eval, best_save_path
            valid_metrics["epoch"] = epoch
            if is_best:
                logging.info("- Found new best {}: {:.3f}".format(setting.metrics[0], val_metric))
                best_val_metric = val_metric

                # Save best val metrics in a json file in the model directory
                best_json_path = os.path.join(logdir, "metrics_val_best_weights.json")
                utils.save_dict_to_json(valid_metrics, best_json_path)

            # Save latest val metrics in a json file in the model directory
            last_json_path = os.path.join(logdir, "metrics_val_last_weights.json")
            utils.save_dict_to_json(valid_metrics, last_json_path)
    
    # final evaluation
    writer.export_scalars_to_json(os.path.join(logdir, "all_scalars.json"))

    if args.save_preds:
        np.save(os.path.join(setting.home, setting.fase + "-fase", "val_preds.npy"), val_preds)
def train_and_evaluate(model,
                       train_data,
                       val_data,
                       optimizer,
                       scheduler,
                       params,
                       model_dir,
                       restore_file=None):
    """Train the model and evaluate every epoch."""
    # reload weights from restore_file if specified
    if restore_file is not None:
        restore_path = os.path.join(args.model_dir,
                                    args.restore_file + '.pth.tar')
        logging.info("Restoring parameters from {}".format(restore_path))
        utils.load_checkpoint(restore_path, model, optimizer)

    best_val_loss = 0.0
    patience_counter = 0

    for epoch in range(1, params.epoch_num + 1):
        # Run one epoch
        logging.info("Epoch {}/{}".format(epoch, params.epoch_num))

        # Compute number of batches in one epoch
        params.train_steps = params.train_size // params.batch_size
        params.val_steps = params.val_size // params.batch_size

        # data iterator for training
        train_data_iterator = data_loader.data_iterator(train_data,
                                                        shuffle=True)
        # Train for one epoch on training set
        train(model, train_data_iterator, optimizer, scheduler, params)

        # data iterator for evaluation
        train_data_iterator = data_loader.data_iterator(train_data,
                                                        shuffle=False)
        val_data_iterator = data_loader.data_iterator(val_data, shuffle=False)

        # Evaluate for one epoch on training set and validation set
        params.eval_steps = params.train_steps
        train_metrics = evaluate(model,
                                 train_data_iterator,
                                 params,
                                 mark='Train')
        params.eval_steps = params.val_steps
        val_metrics = evaluate(model, val_data_iterator, params, mark='Val')
        print("val metrics :", val_metrics)
        val_loss = val_metrics['loss']
        improve_loss = val_loss - best_val_loss

        # Save weights of the network
        model_to_save = model.module if hasattr(
            model, 'module') else model  # Only save the model it-self
        optimizer_to_save = optimizer.optimizer if args.fp16 else optimizer
        utils.save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model_to_save.state_dict(),
                'optim_dict': optimizer_to_save.state_dict()
            },
            is_best=improve_loss > 0,
            checkpoint=model_dir)
        if improve_loss > 0:
            logging.info("- Found new best loss")
            best_val_loss = val_loss
            if improve_loss < params.patience:
                patience_counter += 1
            else:
                patience_counter = 0
        else:
            patience_counter += 1

        # Early stopping and logging best f1
        if (patience_counter >= params.patience_num
                and epoch > params.min_epoch_num) or epoch == params.epoch_num:
            logging.info("Best val loss: {:05.2f}".format(best_val_loss))
            break
Example #36
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 16 13:01:34 2020

@author: ziyi
"""

from evaluate import evaluate

agentName = 'worse'

evaluate(agentName)
Example #37
0
def evaluate_wrapper(pred, lab):
    #    pred = np.argmax(pred.detach().cpu().numpy(), 1)
    pred = pred.detach().cpu().numpy()
    lab = lab.cpu().numpy()
    return evaluate(pred, lab)
Example #38
0
    trainWriter = tf.summary.FileWriter('../report/tf-log/train', graph=sess.graph)
    testWriter = tf.summary.FileWriter('../report/tf-log/test', graph=sess.graph)

    tf.logging.info((green(SimpleNet.print_total_params())))

    while iters.eval() <= iter_limit:
        current_iter = iters.eval()

        if current_iter%5==0:
            tf.logging.info(f'Iter:{iters.eval()}...')

        batch_xs, batch_ys = dh.get_next_batch(iter_based=True, split_channels=True)
        sess.run(update, feed_dict={x:batch_xs, y:batch_ys, keep_prob:dropout_keep})

        if current_iter % 10 == 0:
            evals = evaluate.evaluate(sess, infer, x, y, keep_prob, batch_xs, batch_ys, dropout_keep, '../report/train_progress.csv')
            s = sess.run(summaries, feed_dict={x:batch_xs, y:batch_ys, keep_prob:-1.0})
            trainWriter.add_summary(s, current_iter)

        if current_iter % 50 == 0:
            tf.logging.info(yellow('Testing...'))
            batch_xs, batch_ys = dh.get_next_batch(iter_based=True, force_test=True, split_channels=True)
            evals = evaluate.evaluate(sess, infer, x, y, keep_prob, batch_xs, batch_ys, dropout_keep, '../report/test_progress.csv')

#            run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
#            run_metadata = tf.RunMetadata()
#            s = sess.run(summaries,
#                         feed_dict={x:batch_xs, y:batch_ys},
#                         options=run_options,
#                         run_metadata=run_metadata)
#            testWriter.add_run_metadata(run_metadata, f'train{current_iter}')
Example #39
0
def train_and_evaluate(model,
                       train_data,
                       val_data,
                       optimizer_1,
                       optimizer_2,
                       loss_fn,
                       metrics,
                       params,
                       model_dir,
                       restore_file=None):
    """Train the model and evaluate every epoch.

    Args:
        model: (torch.nn.Module) the neural network
        train_data: (dict) training data with keys 'data' and 'labels'
        val_data: (dict) validaion data with keys 'data' and 'labels'
        optimizer: (torch.optim) optimizer for parameters of model
        loss_fn: a function that takes batch_output and batch_labels and computes the loss for the batch
        metrics: (dict) a dictionary of functions that compute a metric using the output and labels of each batch
        params: (Params) hyperparameters
        model_dir: (string) directory containing config, weights and log
        restore_file: (string) optional- name of file to restore from (without its extension .pth.tar)
    """
    #     print(train_data)
    # reload weights from restore_file if specified
    if restore_file is not None:
        restore_path = os.path.join(args.model_dir,
                                    args.restore_file + '.pth.tar')
        logging.info("Restoring parameters from {}".format(restore_path))
        utils.load_checkpoint(restore_path, model, optimizer)

    best_val_acc = 0.0
    savecoeff = 0
    for epoch in range(params.num_epochs):
        logging.info("Epoch {}/{}".format(epoch + 1, params.num_epochs))

        train(model, optimizer_1, optimizer_2, loss_fn, train_data, metrics,
              params)
        val_metrics = evaluate(model, loss_fn, val_data, metrics, params)

        val_acc = val_metrics['accuracy']
        is_best = val_acc >= best_val_acc

        # Save weights
        savecoeff = savecoeff + 1
        utils.save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'optimw_1_dict': optimizer_1.state_dict(),
                'optimw_2_dict': optimizer_2.state_dict()
            },
            is_best=is_best,
            checkpoint=model_dir)
        if savecoeff % 5 == 0:
            fpath = osp.join(model_dir,
                             'model' + str(epoch) + '.pth.tar-' + str(epoch))
            torch.save(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'optim_1_dict': optimizer_1.state_dict(),
                    'optim_2_dict': optimizer_2.state_dict()
                }, fpath)

        # If best_eval, best_save_path
        if is_best:
            logging.info("- Found new best accuracy")
            best_val_acc = val_acc

            # Save best val metrics in a json file in the model directory
            best_json_path = os.path.join(model_dir,
                                          "metrics_val_best_weights.json")
            utils.save_dict_to_json(val_metrics, best_json_path)

        # Save latest val metrics in a json file in the model directory
        last_json_path = os.path.join(model_dir,
                                      "metrics_val_last_weights.json")
        utils.save_dict_to_json(val_metrics, last_json_path)
Example #40
0
def train_evaluate(model, optimizer, scheduler, dataloader_train, dataloader_val, loss_fn, metric_fn, model_dir, param):
    best_SILog_val = float('inf')
    epoch_start = param['current_epoch']

    for epoch in range(epoch_start, param['epochs']):
        print()
        print('------ Epoch %d, Learning rate = %.2e ------' % (epoch, optimizer.param_groups[0]['lr']))
        train(model, optimizer, dataloader_train, loss_fn, metric_fn, param, epoch)

        # metrics_test = evaluate(model, dataloader_val, loss_fn, metric_fn, param, epoch, writer)
        if isinstance(dataloader_val, dict):
            metrics_val_current = evaluate(model, dataloader_val['current'], loss_fn, metric_fn, param, epoch, writer)
            metrics_val_current = {k + '_val_current': v for k, v in metrics_val_current.items()}
            print(
                '\n------ After training %d epochs, current snowfall\'s validation set metrics mean: ------ \n%s\n' % (epoch, metrics_val_current))
            for metric, value in metrics_val_current.items():
                writer.add_scalar(metric, value, epoch)

            metrics_val_all = evaluate(model, dataloader_val['all'], loss_fn, metric_fn, param, epoch, writer)
            metrics_val_all = {k + '_val_all': v for k, v in metrics_val_all.items()}
            print(
                '\n------ After training %d epochs, all CADC validation set metrics mean: ------ \n%s\n' % (epoch, metrics_val_all))
            for metric, value in metrics_val_all.items():
                writer.add_scalar(metric, value, epoch)

            metrics_val = {**metrics_val_current, **metrics_val_all}
            SILog_val = metrics_val_current['SILog_val_current']

        else:
            metrics_val = evaluate(model, dataloader_val, loss_fn, metric_fn, param, epoch, writer)
            print(
                '\n------ After training %d epochs, validation set metrics mean: ------ \n%s\n' % (epoch, metrics_val))
            for metric, value in metrics_val.items():
                writer.add_scalar(metric, value, epoch)

            SILog_val = metrics_val['SILog']

        if scheduler is not None:
            scheduler.step(SILog_val)

        is_best = SILog_val <= best_SILog_val

        # Save weights
        save_dict = param.copy()
        save_dict['current_epoch'] = epoch
        save_dict['state_dict'] = model.state_dict()
        save_dict['optim_dict'] = optimizer.state_dict()
        if scheduler is not None:
            save_dict['sched_dict'] = scheduler.state_dict()
        experiments_dir = 'experiments/' + model_dir
        utils.save_checkpoint(save_dict, is_best=is_best, folder_path=experiments_dir)

        # If best_eval, best_save_path
        if is_best:
            best_SILog_val = SILog_val

            # Save best val metrics in a json file in the model directory
            best_json_path = os.path.join(experiments_dir, "metrics_test_best_weights.json")
            utils.save_dict_to_json(metrics_val, best_json_path)

        # Save latest val metrics in a json file in the model directory
        last_json_path = os.path.join(experiments_dir, "metrics_test_last_weights.json")
        utils.save_dict_to_json(metrics_val, last_json_path)
Example #41
0
def main():
    torch.set_default_tensor_type(torch.FloatTensor)
    torch.set_num_threads(3)
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    data_start = time.time()
    if args.dataset == 'ml-100k':
        num_users = 943
        num_items = 1682
        dim = 2
    elif args.dataset == 'ml-1m':
        num_users = 6040
        num_items = 3952
        dim = 2
    elif args.dataset == 'ml-10m':
        num_users = 71567
        num_items = 65133
        dim = 2
    elif args.dataset == 'youtube-small':
        num_ps = 600
        num_qs = 14340
        num_rs = 5
        dim = 3

    train_queue, valid_queue, test_queue = utils.get_data_queue(args)
    logging.info('prepare data finish! [%f]' % (time.time() - data_start))

    if args.mode == 'libfm':
        start = time.time()
        from tffm import TFFMRegressor
        import tensorflow as tf
        os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
        model = TFFMRegressor(
            order=dim,
            rank=args.embedding_dim,
            optimizer=tf.train.AdagradOptimizer(learning_rate=args.lr),
            n_epochs=args.train_epochs,
            batch_size=args.batch_size,
            init_std=0.001,
            reg=args.weight_decay,
            input_type='sparse',
            log_dir=os.path.join(save_name, 'libfm-log'))
        model.fit(train_queue[0], train_queue[1], show_progress=True)
        inferences = model.predict(test_queue[0])
        mse = mean_squared_error(test_queue[1], inferences)
        rmse = np.sqrt(mse)
        logging.info('rmse: %.4f[%.4f]' % (rmse, time.time() - start))

    else:
        start = time.time()
        if args.mode == 'ncf':
            if dim == 2:
                model = NCF(num_users, num_items, args.embedding_dim,
                            args.weight_decay).cuda()
            elif dim == 3:
                model = NCF_Triple(num_ps, num_qs, num_rs, args.embedding_dim,
                                   args.weight_decay).cuda()
        elif args.mode == 'deepwide':
            if dim == 2:
                model = DeepWide(num_users, num_items, args.embedding_dim,
                                 args.weight_decay).cuda()
            elif dim == 3:
                model = DeepWide_Triple(num_ps, num_qs, num_rs,
                                        args.embedding_dim,
                                        args.weight_decay).cuda()
        elif args.mode == 'altgrad':
            model = AltGrad(num_users, num_items, args.embedding_dim,
                            args.weight_decay).cuda()
        elif args.mode == 'convncf':
            model = ConvNCF(num_users, num_items, args.embedding_dim,
                            args.weight_decay).cuda()
        elif args.mode == 'outer':
            model = Outer(num_users, num_items, args.embedding_dim,
                          args.weight_decay).cuda()
        elif args.mode == 'conv':
            model = Conv(num_users, num_items, args.embedding_dim,
                         args.weight_decay).cuda()
        elif args.mode == 'plus':
            model = Plus(num_users, num_items, args.embedding_dim,
                         args.weight_decay).cuda()
        elif args.mode == 'max':
            model = Max(num_users, num_items, args.embedding_dim,
                        args.weight_decay).cuda()
        elif args.mode == 'min':
            model = Min(num_users, num_items, args.embedding_dim,
                        args.weight_decay).cuda()
        elif args.mode == 'cp':
            model = CP(num_ps, num_qs, num_rs, args.embedding_dim,
                       args.weight_decay).cuda()
        elif args.mode == 'tucker':
            model = TuckER(num_ps, num_qs, num_rs, args.embedding_dim,
                           args.weight_decay).cuda()
        elif args.mode == 'sif':
            if dim == 2:
                arch = utils.load_arch(num_users, num_items, args)
                print(next(arch['mlp']['p'].parameters()))
                model = Network(num_users, num_items, args.embedding_dim, arch,
                                args.weight_decay).cuda()
            elif dim == 3:
                arch = utils.load_arch_triple(num_ps, num_qs, num_rs, args)
                model = Network_Triple(num_ps, num_qs, num_rs,
                                       args.embedding_dim, arch,
                                       args.weight_decay).cuda()
        logging.info('build model finish! [%f]' % (time.time() - start))

        optimizer = torch.optim.Adagrad(model.parameters(), args.lr)
        if dim == 2:
            train(model, train_queue, test_queue, optimizer, args)
            rmse = evaluate(model, test_queue)
        elif dim == 3:
            train_triple(model, train_queue, test_queue, optimizer, args)
            rmse = evaluate_triple(model, test_queue)
        logging.info('rmse: %.4f' % rmse)
    def train(self):
        self.build()
        analyze_vars(tf.trainable_variables(),
                     os.path.join(self.output_dir, 'model_vars.txt'))
        with open(os.path.join(self.output_dir, 'regularizers.txt'), 'w') as f:
            for v in tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES):
                f.write(v.name + '\n')
        # exit(-1)
        tf_config = tf.ConfigProto(allow_soft_placement=True)
        tf_config.gpu_options.allow_growth = True
        with tf.Session(config=tf_config) as sess:
            tf.global_variables_initializer().run()
            saver_ckpt = tf.train.Saver()
            saver_best = tf.train.Saver()
            summary_writer = tf.summary.FileWriter(self.log_dir, sess.graph)
            start_time = time.time()
            best_acc = 0
            counter = 0
            if config['pretrained_model'] != '':
                saver_ckpt.restore(sess, config['pretrained_model'])
                step = int(
                    os.path.basename(config['pretrained_model']).split('.')
                    [0].split('-')[-1])
                sess.run(tf.assign(self.global_step, step))
                counter = self.global_step.eval(sess)
                print('start step: %d' % counter)
            debug = True
            for i in range(self.epoch_num):
                for j in range(self.step_per_epoch):
                    _, l, l_wd, l_inf, acc, s, _ = sess.run(
                        [
                            self.train_op, self.train_loss, self.wd_loss,
                            self.inference_loss, self.train_acc,
                            self.train_summary, self.inc_op
                        ],
                        feed_dict={
                            self.train_phase_dropout: True,
                            self.train_phase_bn: True
                        })
                    counter += 1

                    # debug
                    # self.save_image_label(train_img, train_lbl, counter)
                    # if(debug):
                    #     if(len(train_imgs) < 100):
                    #         train_imgs.append(train_img[0])
                    #     else:
                    #         np.save(os.path.join(self.debug_dir, 'train_imgs.npy'), np.array(train_imgs))
                    #         debug=False

                    print(
                        "Epoch: [%2d/%2d] [%6d/%6d] time: %.2f, loss: %.3f (inference: %.3f, wd: %.3f), acc: %.3f"
                        % (i, self.epoch_num, j, self.step_per_epoch,
                           time.time() - start_time, l, l_inf, l_wd, acc))
                    start_time = time.time()
                    if counter % self.val_freq == 0:
                        saver_ckpt.save(sess,
                                        os.path.join(self.checkpoint_dir,
                                                     'ckpt-m'),
                                        global_step=counter)
                        acc = []
                        with open(self.val_log, 'a') as f:
                            f.write('step: %d\n' % counter)
                            for k, v in self.val_data.items():
                                imgs, imgs_f, issame = load_bin(
                                    v, self.image_size)
                                embds = self.run_embds(sess, imgs)
                                embds_f = self.run_embds(sess, imgs_f)
                                embds = embds / np.linalg.norm(
                                    embds, axis=1,
                                    keepdims=True) + embds_f / np.linalg.norm(
                                        embds_f, axis=1, keepdims=True)
                                tpr, fpr, acc_mean, acc_std, tar, tar_std, far = evaluate(
                                    embds,
                                    issame,
                                    far_target=1e-3,
                                    distance_metric=0)
                                f.write(
                                    'eval on %s: acc--%1.5f+-%1.5f, tar--%1.5f+-%1.5f@far=%1.5f\n'
                                    %
                                    (k, acc_mean, acc_std, tar, tar_std, far))
                                acc.append(acc_mean)
                            acc = np.mean(np.array(acc))
                            if acc > best_acc:
                                saver_best.save(sess,
                                                os.path.join(
                                                    self.model_dir, 'best-m'),
                                                global_step=counter)
                                best_acc = acc
    #score.extend(sc)
    reference.extend(ref)
    pred_all.extend(pre)
    print(str(i) + '_json readed.')

length = len(target_words)
assert len(reference) == length

map_freq = [[], [], [], []]
map_dn = [[], [], [], [], [], []]
map = []

target_words0 = []
for i in range(10):
    target_words0.extend((open(str(i) + 'target_words.txt',
                               'r').readlines()[0]).split())
for i in range(length):
    m = evaluate(reference[i], pred_all[i])
    map.append(m)
    map_freq[wf_ind[target_words0[i]]].append(m)
    if dn_ind[target_words0[i]] > 5:
        map_dn[5].append(m)
    else:
        map_dn[dn_ind[target_words0[i]] - 1].append(m)
print('MAP in each word frequences:')
for m in map_freq:
    print(np.mean(np.array(m)))
print('MAP in each defi numbers:')
for m in map_dn:
    print(np.mean(np.array(m)))
print('-----MAP: ', np.mean(np.array(map)))
Example #44
0
def train_and_evaluate(model, params, restore_file=None):
    """Train the model and evaluate every epoch."""
    # load args
    args = parser.parse_args()

    # Load training data and val data
    dataloader = NERDataLoader(params)
    train_loader = dataloader.get_dataloader(data_sign='train')
    val_loader = dataloader.get_dataloader(data_sign='val')
    # 一个epoch的步数
    params.train_steps = len(train_loader)

    # Prepare optimizer
    # fine-tuning
    # 取模型权重
    param_optimizer = list(model.named_parameters())
    # pretrain model param
    param_pre = [(n, p) for n, p in param_optimizer if 'bert' in n]
    # middle model param
    param_middle = [(n, p) for n, p in param_optimizer if 'bilstm' in n or 'dym_weight' in n]
    # crf param
    param_crf = [p for n, p in param_optimizer if 'crf' in n]
    # 不进行衰减的权重
    no_decay = ['bias', 'LayerNorm', 'dym_weight', 'layer_norm']
    # 将权重分组
    optimizer_grouped_parameters = [
        # pretrain model param
        # 衰减
        {'params': [p for n, p in param_pre if not any(nd in n for nd in no_decay)],
         'weight_decay': params.weight_decay_rate, 'lr': params.fin_tuning_lr
         },
        # 不衰减
        {'params': [p for n, p in param_pre if any(nd in n for nd in no_decay)],
         'weight_decay': 0.0, 'lr': params.fin_tuning_lr
         },
        # middle model
        # 衰减
        {'params': [p for n, p in param_middle if not any(nd in n for nd in no_decay)],
         'weight_decay': params.weight_decay_rate, 'lr': params.downs_en_lr
         },
        # 不衰减
        {'params': [p for n, p in param_middle if any(nd in n for nd in no_decay)],
         'weight_decay': 0.0, 'lr': params.downs_en_lr
         },
        # crf,单独设置学习率
        {'params': param_crf,
         'weight_decay': 0.0, 'lr': params.crf_lr}
    ]
    num_train_optimization_steps = len(train_loader) // params.gradient_accumulation_steps * args.epoch_num
    optimizer = BertAdam(optimizer_grouped_parameters, warmup=params.warmup_prop, schedule="warmup_cosine",
                         t_total=num_train_optimization_steps, max_grad_norm=params.clip_grad)

    # TODO: reload weights from restore_file if specified(DataParallel)
    if restore_file is not None:
        restore_path = os.path.join(params.model_dir, args.restore_file + '.pth.tar')
        logging.info("Restoring parameters from {}".format(restore_path))
        # 读取checkpoint
        utils.load_checkpoint(restore_path, model, optimizer)

    # patience stage
    best_val_f1 = 0.0
    patience_counter = 0

    for epoch in range(1, args.epoch_num + 1):
        # Run one epoch
        logging.info("Epoch {}/{}".format(epoch, args.epoch_num))

        # Train for one epoch on training set
        train(model, train_loader, optimizer, params)

        # Evaluate for one epoch on training set and validation set
        # train_metrics = evaluate(model, train_loader, params, mark='Train',
        #                          verbose=True)  # Dict['loss', 'f1']
        val_metrics = evaluate(args, model, val_loader, params, mark='Val',
                               verbose=True)  # Dict['loss', 'f1']
        # 验证集f1-score
        val_f1 = val_metrics['f1']
        # 提升的f1-score
        improve_f1 = val_f1 - best_val_f1

        # Save weights of the network
        model_to_save = model.module if hasattr(model, 'module') else model  # Only save the model it-self
        optimizer_to_save = optimizer
        utils.save_checkpoint({'epoch': epoch + 1,
                               'state_dict': model_to_save.state_dict(),
                               'optim_dict': optimizer_to_save.state_dict()},
                              is_best=improve_f1 > 0,
                              checkpoint=params.model_dir)
        params.save(params.params_path / 'params.json')

        # stop training based params.patience
        if improve_f1 > 0:
            logging.info("- Found new best F1")
            best_val_f1 = val_f1
            if improve_f1 < params.patience:
                patience_counter += 1
            else:
                patience_counter = 0
        else:
            patience_counter += 1

        # Early stopping and logging best f1
        if (patience_counter > params.patience_num and epoch > params.min_epoch_num) or epoch == args.epoch_num:
            logging.info("Best val f1: {:05.2f}".format(best_val_f1))
            break
criterion = nn.CrossEntropyLoss(ignore_index=TRG_PAD_IDX)

if ARGS.train:
    print(f'The model has {count_parameters(model):,} trainable parameters')

    optimizer = optim.Adam(model.parameters())

    N_EPOCHS = 10
    CLIP = 0.1
    best_valid_loss = float('inf')

    for epoch in range(N_EPOCHS):
        start_time = time.time()

        train_loss = train(model, train_iterator, optimizer, criterion, CLIP)
        valid_loss = evaluate(model, valid_iterator, criterion)

        end_time = time.time()

        epoch_mins, epoch_secs = epoch_time(start_time, end_time)

        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            torch.save(model.state_dict(), 'tut5-model.pt')

        print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
        print(
            f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}'
        )
        print(
            f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}'
Example #46
0
def train_and_evaluate(model, train_dataloader, val_dataloader, optimizer, critierion, metrics, params, model_dir,
                       restore_file=None):
    # reload weights from restore_file if specified
    if restore_file is not None:
        restore_path = os.path.join(args.model_dir, args.restore_file + '.pth.tar')
        logging.info("Restoring parameters from {}".format(restore_path))
        utils.load_checkpoint(restore_path, model, optimizer)
        
    best_val_acc = 0.0
    best_val_metrics = []
    learning_rate_0 = params.learning_rate
    train_acc_series = []
    val_acc_series = []
    train_loss_series = []
    
    for epoch in range(params.num_epochs):
        logging.info("Epoch {}/{}".format(epoch + 1, params.num_epochs))
        
        # train model
        train_metrics = train(model, train_dataloader, optimizer, critierion, metrics, params)
        
        # learning rate exponential decay
        params.learning_rate = learning_rate_0 * np.exp(-params.exp_decay_k * epoch)
        
        # evaluate
        val_metrics = evaluate(model, critierion, val_dataloader, metrics, params)
        
        # find accuracy from validation dataset
        val_acc = val_metrics['accuracy']
        is_best = val_acc >= best_val_acc
        
        # save weights
        utils.save_checkpoint({'epoch': epoch + 1,
                               'state_dict': model.state_dict(),
                               'optim_dict': optimizer.state_dict()},
                              is_best=is_best,
                              checkpoint=model_dir)
        
        # save accuracy / loss to array for plot
        train_acc_series.append(train_metrics['accuracy'])
        val_acc_series.append(val_metrics['accuracy'])
        train_loss_series.append(train_metrics['loss'])
        
        # If best_eval, best_save_path
        if is_best:
            logging.info("- Found new best accuracy")
            best_val_acc = val_acc
            best_val_metrics = val_metrics

            # Save best val metrics in a json file in the model directory
            best_json_path = os.path.join(
                model_dir, "metrics_val_best_weights.json")
            utils.save_dict_to_json(val_metrics, best_json_path)
        
        # Save latest val metrics in a json file in the model directory
        last_json_path = os.path.join(
            model_dir, "metrics_val_last_weights.json")
        utils.save_dict_to_json(val_metrics, last_json_path)
        print('******************************************')
    
    # plot visualized performance
    visualize.plot_train_val_accuracy(train_acc_series, val_acc_series)
    visualize.plot_loss(train_loss_series)
    # save best validation F1 score plot
    visualize.plot_individual_label_f1score(best_val_metrics)
Example #47
0
def run(model, dataloaders, loss_fn, optimizer, lr_scheduler, device, params,
        memo):
    """
    Runs train and validation every epoch while updating `model`'s parameters using the 
    `loss_fn` and `optimizers`. `optimizer`'s learning rates are updated during the training via `lr_scheduler`.
    
    Before calling this function, put the model in `device`, and *then* set this model's parameters as optimizer's target variables:
    
    ```python
    # Example
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=dummy_lr) # dummy_lr because it will always be 
                                        # overwritten by lr_scheduler's returned value
    dataloaders = {'train': train_dl,
                    'val': val_dl}
    loss_fn = nn.CrossEntropy(...)
    lr_scheduler = TriangleLR(...)
    params = {'max_epoch': , # default: 50, 
              'batch_size': , # default: 8 , 
              'n_classes' : 21, #default: 21
              'fill_value': , # default: 255 , 
              'save_every': , # default: 10 , 
              'print_every': , # default: 5,
              'ignore_idx': 255, # default: 255
              'ignore_bg': True # default: True
              'debug': False # default: False
              }
    
    result = run(model, dataloaders, loss_fn, optimizer, lr_scheduler, device, params)
    ```
    
    Assumes
    - model is already sent to the `device`
    - optimizer has been constructed with model.parameters() as input
    - `device` will be used to send the data (returned from dataloaders) to the `device`
        that is the same device where the input `model` is located
    """

    # Get parameters
    max_epoch = params.get('max_epoch', 50)
    batch_size = params.get('batch_size', 8)
    best_iou = params.get('best_iou', -100)
    fill_value = params.get('fill_value', 255)
    ignore_gt = params.get(
        'ignore_gt', 0
    )  # None or, one of the class labels to be removed from acc computation
    save_every = params.get('save_every', 10)  # unit of epoch
    print_every = params.get('print_every', 5)  # unit of epoch
    log_fn = params.get(
        'log_fn',
        f'/data/rl/log/{model.name}-fill-{fill_value}-bs-{batch_size}/{now2str()}/log.txt'
    )

    # Get dataloaders
    train_dl, val_dl = dataloaders['train'], dataloaders['val']
    assert batch_size == train_dl.batch_size, "batch_size must be the batchsize of train_dl"

    # Set log file directory if not existing yet
    if not isinstance(log_fn, Path):
        log_fn = Path(log_fn)
    if not log_fn.parent.exists():
        log_fn.parent.mkdir(parents=True)
        print('Created : ', str(log_fn.parent))
    print('log file: ', log_fn)

    # Start experiment
    all_train_losses = []  # from each iteration
    all_train_accs = []
    ep_train_losses = []  # averge losses from each epoch
    ep_train_accs = []

    val_losses = []  # from each epoch
    val_accs = []
    val_ious = []
    #best_iou is set above where the parameters are read from the  input `params` dict
    exp_start = time.time()
    for epoch in tqdm(range(max_epoch), desc='Epoch'):
        # Train
        start = time.time()
        train_result = train(model, train_dl, loss_fn, optimizer, lr_scheduler,
                             device, params, memo)
        end = time.time()

        # Collect train metrics
        all_train_losses.extend(train_result['train_losses'])
        all_train_accs.extend(train_result['train_accs'])
        ep_train_losses.append(train_result['loss_meter'].avg)
        ep_train_accs.append(train_result['acc_meter'].avg)

        # Logging after each train epoch
        append2file(
            f"{'='*80}"
            f'\nEpoch: {epoch}/{max_epoch}'
            f'\n\tTrain took: {(end-start)/60.0:.3f} mins'
            f'\n\tTrain loss: {ep_train_losses[-1]:9.5f}'
            f'\n\tTrain acc: {ep_train_accs[-1]:9.5f}%', log_fn)
        ## train_result = {'running_metrics': running_metrics,
        #               'loss_meter': loss_meter,
        #               'acc_meter': acc_meter,
        #              'train_losses': train_losses,
        #              'train_accs': train_accs}

        # Evaluate
        val_result = evaluate(model, val_dl, loss_fn, device, params)

        # Collect validation metrics
        val_losses.append(val_result['loss_meter'].avg)
        val_accs.append(val_result['acc_meter'].avg)

        score, class_iou = val_result['running_metrics'].get_scores(
            ignore_gt=ignore_gt)
        val_ious.append(score["Mean IoU : \t"])

        # Log evaluation for this epoch
        mean_acc = score['Mean Acc : \t']
        append2file(
            f"\n\tVal loss: {val_losses[-1]:9.5f}"
            f"\n\tVal acc: {val_accs[-1]:9.5f}%"
            f"\n\t\t vs. {mean_acc}"
            f"\n\tVal mean IOU: {val_ious[-1]:9.5f}", log_fn)

        for k, v in score.items():
            #             print(k, v) ##printing here
            append2file(f'\tVal_metric/{k} -- {v:9.5f}', log_fn)

        for k, v in class_iou.items():
            append2file(f'\tVal Class IOU/{k} -- {v:9.5f}', log_fn)

        # Save current state if it achieves the best IOU on validation set
        if best_iou < 0 or (score["Mean IoU : \t"] -
                            best_iou) / best_iou > 0.05:
            best_iou = score["Mean IoU : \t"]
            state = {
                "epoch": epoch + 1,
                "model_state": model.state_dict(),
                "optimizer_state": optimizer.state_dict(),
                "best_iou": best_iou,
            }
            torch.save(state, log_fn.parent / 'best_state.pth')
            print(
                f"Updated a new best state. ep: {state['epoch']}, iou: {state['best_iou']}"
            )
            for k, v in score.items():
                print(k, v)  ##printing here

        # Save the current model if it's time to save intermittently
        if (epoch + 1) % save_every == 0:
            print("=" * 80)
            print('Epoch: ', epoch, ' Saved model')
            out_fn = log_fn.parent / f'{model.name}_{epoch}.pth'
            save_checkpt(epoch, model, optimizer, ep_train_losses[-1], out_fn)
            ##     result = {'running_metrics': running_metrics,
#               'loss_meter': loss_meter,
#               'acc_meter': acc_meter}

# Log this experiment's train and val losses
    out_fn = log_fn.parent / (log_fn.stem + '_losses.npz')
    np.savez_compressed(out_fn,
                        train_losses=all_train_losses,
                        train_accs=all_train_accs,
                        val_losses=val_losses,
                        val_accs=val_accs)
    print('Saved the losses to...: ', out_fn)
    print(f'Experiement took : {(time.time() - exp_start)/60:.3f} mins')

    result = {
        'train': {
            'loss': ep_train_losses,
            'acc': ep_train_accs
        },
        'val': {
            'loss': val_losses,
            'acc': val_accs
        }
    }

    return model, result
Example #48
0
 def _run_epoch_eval(self, sess, y_prob, labels, metric):
     score = evaluate.evaluate(labels, y_prob, metric=metric, average=True)
     return score
Example #49
0
def main():
    args = parser.parse_args()

    # torch setting
    torch.random.manual_seed(args.seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # os setting
    path = args.dataset_path
    train_path = os.path.join(path, "train/train.txt")
    validation_path = os.path.join(path, "valid/valid.txt")
    test_path = os.path.join(path, "test/test.txt")
    params_path = os.path.join(args.model_dir, 'params.json')
    checkpoint_dir = os.path.join(args.model_dir, 'checkpoint')
    tensorboard_log_dir = os.path.join(args.model_dir, 'log')
    utils.check_dir(tensorboard_log_dir)

    entity2id, relation2id = data_loader.create_mappings(train_path)

    # params
    params = utils.Params(params_path)
    params.device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')

    # dataset
    train_set = data_loader.FB15KDataset(train_path, entity2id, relation2id)
    train_generator = torch_data.DataLoader(train_set,
                                            batch_size=params.batch_size)
    validation_set = data_loader.FB15KDataset(validation_path, entity2id,
                                              relation2id)
    validation_generator = torch_data.DataLoader(
        validation_set, batch_size=params.validation_batch_size)
    test_set = data_loader.FB15KDataset(test_path, entity2id, relation2id)
    test_generator = torch_data.DataLoader(
        test_set, batch_size=params.validation_batch_size)

    # model
    model = net.Net(entity_count=len(entity2id),
                    relation_count=len(relation2id),
                    dim=params.embedding_dim,
                    margin=params.margin,
                    device=params.device,
                    norm=params.norm)  # type: torch.nn.Module
    model = model.to(params.device)
    optimizer = optim.SGD(model.parameters(), lr=params.learning_rate)
    summary_writer = tensorboard.SummaryWriter(log_dir=tensorboard_log_dir)
    start_epoch_id = 1
    step = 0
    best_score = 0.0

    print("Training Dataset: entity: {} relation: {} triples: {}".format(
        len(entity2id), len(relation2id), len(train_set)))
    print("Validation Dataset: triples: {}".format(len(validation_set)))
    print("Test Dataset: triples: {}".format(len(test_set)))
    print(model)

    # Train
    for epoch_id in range(start_epoch_id, params.epochs + 1):
        print("Epoch {}/{}".format(epoch_id, params.epochs))

        loss_impacting_samples_count = 0
        samples_count = 0
        model.train()

        with tqdm(total=len(train_generator)) as t:
            for local_heads, local_relations, local_tails in train_generator:
                local_heads, local_relations, local_tails = (local_heads.to(
                    params.device), local_relations.to(
                        params.device), local_tails.to(params.device))

                positive_triples = torch.stack(
                    (local_heads, local_relations, local_tails), dim=1)

                # Preparing negatives.
                # Generate binary tensor to replace either head or tail. 1 means replace head, 0 means replace tail.
                head_or_tail = torch.randint(high=2,
                                             size=local_heads.size(),
                                             device=params.device)
                random_entities = torch.randint(high=len(entity2id),
                                                size=local_heads.size(),
                                                device=params.device)
                broken_heads = torch.where(head_or_tail == 1, random_entities,
                                           local_heads)
                broken_tails = torch.where(head_or_tail == 0, random_entities,
                                           local_tails)
                negative_triples = torch.stack(
                    (broken_heads, local_relations, broken_tails), dim=1)

                optimizer.zero_grad()

                loss, pd, nd = model(positive_triples, negative_triples)
                loss.mean().backward()

                summary_writer.add_scalar('Loss/train',
                                          loss.mean().data.cpu().numpy(),
                                          global_step=step)
                summary_writer.add_scalar('Distance/positive',
                                          pd.sum().data.cpu().numpy(),
                                          global_step=step)
                summary_writer.add_scalar('Distance/negative',
                                          nd.sum().data.cpu().numpy(),
                                          global_step=step)

                loss = loss.data.cpu()
                loss_impacting_samples_count += loss.nonzero().size()[0]
                samples_count += loss.size()[0]

                optimizer.step()
                step += 1

                t.set_postfix(loss=loss_impacting_samples_count /
                              samples_count * 100)
                t.update()

            summary_writer.add_scalar('Metrics/batch_loss',
                                      loss_impacting_samples_count /
                                      samples_count * 100,
                                      global_step=epoch_id)

            # validation
            if epoch_id % params.validation_freq == 0:
                model.eval()
                _, _, hits_at_10, _ = evaluate(
                    model=model,
                    data_generator=validation_generator,
                    entities_count=len(entity2id),
                    device=params.device,
                    summary_writer=summary_writer,
                    epoch_id=epoch_id,
                    metric_suffix="val")
                score = hits_at_10
                if score > best_score:
                    best_score = score
                    utils.save_checkpoint(checkpoint_dir, model, optimizer,
                                          epoch_id, step, best_score)

    # Testing the best checkpoint on test dataset
    utils.load_checkpoint(checkpoint_dir, model, optimizer)
    best_model = model.to(params.device)
    best_model.eval()
    scores = evaluate(model=best_model,
                      data_generator=test_generator,
                      entities_count=len(entity2id),
                      device=params.device,
                      summary_writer=summary_writer,
                      epoch_id=1,
                      metric_suffix="test")
    print("Test scores: \n hit%1: {} \n hit%3: {} \nh it%10: {} \n mrr: {}".
          format(scores[0], scores[1], scores[2], scores[3]))

    eval_path = os.path.join(args.model_dir, 'eval.json')
    evals_params = utils.Params(eval_path)
    evals_params.hit_1 = scores[0]
    evals_params.hit_3 = scores[1]
    evals_params.hit_10 = scores[2]
    evals_params.mrr = scores[3]
    evals_params.best_score = best_score
    evals_params.save(eval_path)
Example #50
0
def main(args, configs):
    print("Prepare training ...")

    preprocess_config, model_config, train_config = configs

    # Get dataset
    dataset = Dataset("train.txt",
                      preprocess_config,
                      train_config,
                      model_config,
                      sort=True,
                      drop_last=True)
    batch_size = train_config["optimizer"]["batch_size"]
    group_size = 4  # Set this larger than 1 to enable sorting in Dataset
    assert batch_size * group_size < len(dataset)
    loader = DataLoader(
        dataset,
        batch_size=batch_size * group_size,
        shuffle=True,
        collate_fn=dataset.collate_fn,
    )

    # Prepare model
    model, optimizer = get_model(args, configs, device, train=True)
    model = nn.DataParallel(model)
    num_param = get_param_num(model)
    Loss = FastSpeech2Loss(preprocess_config, model_config).to(device)
    print("Number of FastSpeech2 Parameters:", num_param)

    # Load checkpoint if exists
    if args.restore_path is not None and os.path.isfile(args.restore_path):
        checkpoint = torch.load(args.restore_path)
        pretrained_dict = checkpoint['model']
        if not any(key.startswith('module.') for key in pretrained_dict):
            pretrained_dict = {
                'module.' + k: v
                for k, v in pretrained_dict.items()
            }

        dem1 = 0
        dem2 = 0
        model_dict = model.state_dict()
        for k, v in pretrained_dict.items():
            if k in model_dict and v.size() == model_dict[k].size():
                # print('Load weight in ', k)
                dem1 += 1
            else:
                print(f'Module {k} is not same size')
                dem2 += 1
        dem2 += dem1
        print(f'### Load {dem1}/{dem2} modules')
        # 1. filter out unnecessary keys
        pretrained_dict = {
            k: v
            for k, v in pretrained_dict.items()
            if k in model_dict and v.size() == model_dict[k].size()
        }
        # 2. overwrite entries in the existing state dict
        model_dict.update(pretrained_dict)
        # 3. load the new state dict
        model.load_state_dict(model_dict)
        # model.load_state_dict(checkpoint['model'])
        # optimizer.load_state_dict(checkpoint['optimizer'])
        print("\n---Model Restored at Step {}---\n".format(args.restore_step))

    # Load vocoder
    vocoder = get_vocoder(model_config, device)

    # Init logger
    for p in train_config["path"].values():
        os.makedirs(p, exist_ok=True)
    train_log_path = os.path.join(train_config["path"]["log_path"], "train")
    val_log_path = os.path.join(train_config["path"]["log_path"], "val")
    os.makedirs(train_log_path, exist_ok=True)
    os.makedirs(val_log_path, exist_ok=True)
    train_logger = SummaryWriter(train_log_path)
    val_logger = SummaryWriter(val_log_path)

    # Training
    step = args.restore_step + 1
    epoch = 1
    grad_acc_step = train_config["optimizer"]["grad_acc_step"]
    grad_clip_thresh = train_config["optimizer"]["grad_clip_thresh"]
    total_step = train_config["step"]["total_step"]
    log_step = train_config["step"]["log_step"]
    save_step = train_config["step"]["save_step"]
    synth_step = train_config["step"]["synth_step"]
    val_step = train_config["step"]["val_step"]

    outer_bar = tqdm(total=total_step, desc="Training", position=0)
    outer_bar.n = args.restore_step
    outer_bar.update()

    while True:
        inner_bar = tqdm(total=len(loader),
                         desc="Epoch {}".format(epoch),
                         position=1)
        for batchs in loader:
            for batch in batchs:
                batch = to_device(batch, device)

                # Forward
                output = model(*(batch[2:]))

                # Cal Loss
                losses = Loss(batch, output)
                total_loss = losses[0]

                # Backward
                total_loss = total_loss / grad_acc_step
                total_loss.backward()
                if step % grad_acc_step == 0:
                    # Clipping gradients to avoid gradient explosion
                    nn.utils.clip_grad_norm_(model.parameters(),
                                             grad_clip_thresh)

                    # Update weights
                    optimizer.step_and_update_lr()
                    optimizer.zero_grad()

                if step % log_step == 0:
                    losses = [l.item() for l in losses]
                    message1 = "Step {}/{}|".format(step, total_step)
                    message2 = "|Total Loss: {:.4f}|Mel Loss: {:.4f}|Mel PostNet Loss: {:.4f}|Pitch Loss: {:.4f}|Energy Loss: {:.4f}|Duration Loss: {:.4f}|".format(
                        *losses)

                    # with open(os.path.join(train_log_path, "log.txt"), "a") as f:
                    #     f.write(message1 + message2 + "\n")

                    outer_bar.write(message1 + message2)

                    log(train_logger, step, losses=losses)

                if step % synth_step == 0:
                    output_preidiction = model(*(batch[2:6]))
                    fig, wav_reconstruction, wav_prediction, tag = synth_one_sample(
                        batch,
                        output_preidiction,
                        vocoder,
                        model_config,
                        preprocess_config,
                    )
                    log(
                        train_logger,
                        fig=fig,
                        tag="Training/step_{}_{}".format(step, tag),
                    )
                    sampling_rate = preprocess_config["preprocessing"][
                        "audio"]["sampling_rate"]
                    log(
                        train_logger,
                        audio=wav_reconstruction,
                        sampling_rate=sampling_rate,
                        tag="Training/step_{}_{}_reconstructed".format(
                            step, tag),
                    )
                    log(
                        train_logger,
                        audio=wav_prediction,
                        sampling_rate=sampling_rate,
                        tag="Training/step_{}_{}_synthesized".format(
                            step, tag),
                    )

                if step % val_step == 0:
                    model.eval()
                    message = evaluate(model, step, configs, val_logger,
                                       vocoder)
                    # with open(os.path.join(val_log_path, "log.txt"), "a") as f:
                    #     f.write(message + "\n")
                    outer_bar.write(message)

                    model.train()

                if step % save_step == 0:
                    torch.save(
                        {
                            "model": model.module.state_dict(),
                            "optimizer": optimizer._optimizer.state_dict(),
                        },
                        os.path.join(
                            train_config["path"]["ckpt_path"],
                            "{}.pth.tar".format(step),
                        ),
                    )

                if step == total_step:
                    quit()
                step += 1
                outer_bar.update(1)

            inner_bar.update(1)
        epoch += 1
            net.forward(patch, mask, training=True)
            # label通道1->9
            mask = label2multichannel(mask.cpu(), class_num)
            mask = mask.to(device)
            elbo = net.elbo(mask)
            ###
            reg_loss = l2_regularisation(net.posterior) + l2_regularisation(net.prior) + l2_regularisation(net.fcomb.layers)
            loss = -elbo + 1e-5 * reg_loss
            losses += loss
            if step%10 == 0:
                print("-- [step {}] loss: {}".format(step, loss))
                # evaluate(net, test_loader, device, test=True)    
                # break
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # 评估
        losses /= (step+1)
        print("Loss (Train): {}".format(losses))
        # evaluate(net, train_eval_loader, device, test=False)     
        evaluate(net, test_loader, device, test=True)        
except KeyboardInterrupt as e:
    print('KeyboardInterrupt: {}'.format(e))
except Exception as e:
    print('Exception: {}'.format(e))
finally:
    # 保存模型
    print("saving the trained net model -- unet_{}.pt".format(class_num))
    save_model(net, path='model/unet_{}.pt'.format(class_num))
Example #52
0
def train(args, train_loader, test_loader, net, criterion, device):
    """
    Args:
        args: parsed command line arguments.
        train_loader: an iterator over the training set.
        test_loader: an iterator over the test set.
        net: the neural network model employed.
        criterion: the loss function.
        device: using CPU or GPU.

    Outputs:
        All training losses, training accuracies, test losses, and test
        accuracies on each evaluation during training.
    """
    optimizer = load_optim(params=net.parameters(),
                           optim_method=args.optim_method,
                           eta0=args.eta0,
                           alpha=args.alpha,
                           c=args.c,
                           milestones=args.milestones,
                           T_max=args.train_epochs * len(train_loader),
                           n_batches_per_epoch=len(train_loader),
                           nesterov=args.nesterov,
                           momentum=args.momentum,
                           weight_decay=args.weight_decay)

    if args.optim_method == 'SGD_ReduceLROnPlateau':
        scheduler = ReduceLROnPlateau(optimizer,
                                      mode='min',
                                      factor=args.alpha,
                                      patience=args.patience,
                                      threshold=args.threshold)

    # Choose loss and metric function
    loss_function = metrics.get_metric_function('softmax_loss')

    all_train_losses = []
    all_train_accuracies = []
    all_test_losses = []
    all_test_accuracies = []
    for epoch in range(1, args.train_epochs + 1):
        net.train()
        for data in train_loader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            if args.optim_method.startswith('SLS'):
                closure = lambda: loss_function(
                    net, inputs, labels, backwards=False)
                optimizer.step(closure)
            else:
                outputs = net(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                if 'Polyak' in args.optim_method:
                    optimizer.step(loss.item())
                else:
                    optimizer.step()

        # Evaluate the model on training and validation dataset.
        if args.optim_method == 'SGD_ReduceLROnPlateau' or (epoch %
                                                            args.eval_interval
                                                            == 0):
            train_loss, train_accuracy = evaluate(train_loader, net, criterion,
                                                  device)
            all_train_losses.append(train_loss)
            all_train_accuracies.append(train_accuracy)

            test_loss, test_accuracy = evaluate(test_loader, net, criterion,
                                                device)
            all_test_losses.append(test_loss)
            all_test_accuracies.append(test_accuracy)

            print('Epoch %d --- ' % (epoch),
                  'train: loss - %g, ' % (train_loss),
                  'accuracy - %g; ' % (train_accuracy),
                  'test: loss - %g, ' % (test_loss),
                  'accuracy - %g' % (test_accuracy))

            if args.optim_method == 'SGD_ReduceLROnPlateau':
                scheduler.step(test_loss)

    return (all_train_losses, all_train_accuracies, all_test_losses,
            all_test_accuracies)
Example #53
0
def train_and_evaluate(model,
                       meta_train_classes,
                       meta_val_classes,
                       meta_test_classes,
                       task_type,
                       optimizer,
                       scheduler,
                       loss_fn,
                       metrics,
                       params,
                       model_dir,
                       restore_file=None):
    """
    Train the model and evaluate every `save_summary_steps`.

    Args:
        model: TPN model
        meta_train_classes: (list) the classes for meta-training
        meta_val_classes: (list) the classes for meta-validating
        meta_test_classes: (list) the classes for meta-testing
        task_type: (subclass of FewShotTask) a type for generating tasks
        optimizer: (torch.optim) optimizer for parameters of model
        scheduler: (torch.optim.lr_scheduler) scheduler for decaying learning rate
        loss_fn: a loss function
        metrics: (dict) a dictionary of functions that compute a metric using 
                 the output and labels of each batch
        params: (Params) hyperparameters
        model_dir: (string) directory containing config, weights and log
        restore_file: (string) optional- name of file to restore from
                      (without its extension .pth.tar)
    """
    # reload weights from restore_file if specified
    if restore_file is not None:
        restore_path = os.path.join(args.model_dir,
                                    args.restore_file + '.pth.tar')
        logging.info("Restoring parameters from {}".format(restore_path))
        utils.load_checkpoint(restore_path, model, optimizer)

    # params information
    num_classes = params.num_classes
    num_samples = params.num_samples
    num_query = params.num_query

    # validation accuracy
    best_val_loss = float('inf')

    # For plotting to see summerized training procedure
    plot_history = {
        'train_loss': [],
        'train_acc': [],
        'val_loss': [],
        'val_acc': [],
        'test_loss': [],
        'test_acc': []
    }

    with tqdm(total=params.num_episodes) as t:
        for episode in range(params.num_episodes):
            # Run one episode
            logging.info("Episode {}/{}".format(episode + 1,
                                                params.num_episodes))
            scheduler.step()

            # Train a model on a single task (episode).
            # TODO meta-batch of tasks
            task = task_type(meta_train_classes, num_classes, num_samples,
                             num_query)
            dataloaders = fetch_dataloaders(['train', 'test'], task)

            _ = train_single_task(model, optimizer, loss_fn, dataloaders,
                                  metrics, params)
            # print(episode, _)

            # Evaluate on train, val, test dataset given a number of tasks (params.num_steps)
            if (episode + 1) % params.save_summary_steps == 0:
                train_metrics = evaluate(model, loss_fn, meta_train_classes,
                                         task_type, metrics, params, 'train')
                val_metrics = evaluate(model, loss_fn, meta_val_classes,
                                       task_type, metrics, params, 'val')
                test_metrics = evaluate(model, loss_fn, meta_test_classes,
                                        task_type, metrics, params, 'test')

                train_loss = train_metrics['loss']
                val_loss = val_metrics['loss']
                test_loss = test_metrics['loss']
                train_acc = train_metrics['accuracy']
                val_acc = val_metrics['accuracy']
                test_acc = test_metrics['accuracy']

                is_best = val_loss <= best_val_loss

                # Save weights
                utils.save_checkpoint({
                    'episode': episode + 1,
                    'state_dict': model.state_dict(),
                    'optim_dict': optimizer.state_dict()
                },
                                      is_best=is_best,
                                      checkpoint=model_dir)

                # If best_test, best_save_path
                if is_best:
                    logging.info("- Found new best accuracy")
                    best_val_loss = val_loss

                    # Save best test metrics in a json file in the model directory
                    best_train_json_path = os.path.join(
                        model_dir, "metrics_train_best_weights.json")
                    utils.save_dict_to_json(train_metrics,
                                            best_train_json_path)
                    best_val_json_path = os.path.join(
                        model_dir, "metrics_val_best_weights.json")
                    utils.save_dict_to_json(val_metrics, best_val_json_path)
                    best_test_json_path = os.path.join(
                        model_dir, "metrics_test_best_weights.json")
                    utils.save_dict_to_json(test_metrics, best_test_json_path)

                # Save latest test metrics in a json file in the model directory
                last_train_json_path = os.path.join(
                    model_dir, "metrics_train_last_weights.json")
                utils.save_dict_to_json(train_metrics, last_train_json_path)
                last_val_json_path = os.path.join(
                    model_dir, "metrics_val_last_weights.json")
                utils.save_dict_to_json(val_metrics, last_val_json_path)
                last_test_json_path = os.path.join(
                    model_dir, "metrics_test_last_weights.json")
                utils.save_dict_to_json(test_metrics, last_test_json_path)

                plot_history['train_loss'].append(train_loss)
                plot_history['train_acc'].append(train_acc)
                plot_history['val_loss'].append(val_loss)
                plot_history['val_acc'].append(val_acc)
                plot_history['test_loss'].append(test_loss)
                plot_history['test_acc'].append(test_acc)
                utils.plot_training_results(args.model_dir, plot_history)

                t.set_postfix(
                    tr_acc='{:05.3f}'.format(train_acc),
                    te_acc='{:05.3f}'.format(test_acc),
                    tr_loss='{:05.3f}'.format(train_loss),
                    te_loss='{:05.3f}'.format(test_loss))
                print('\n')

            t.update()
Example #54
0
def train():
    writer = SummaryWriter(
        log_dir=
        f"./runs/{model_name}/{datetime.datetime.now().replace(microsecond=0).isoformat()}{'-' + os.environ['REMARK'] if 'REMARK' in os.environ else ''}"
    )

    if not os.path.exists('checkpoint'):
        os.makedirs('checkpoint')

    try:
        pretrained_word_embedding = torch.from_numpy(
            np.load('./data/train/pretrained_word_embedding.npy')).float()
    except FileNotFoundError:
        pretrained_word_embedding = None

    if model_name == 'DKN':
        try:
            pretrained_entity_embedding = torch.from_numpy(
                np.load(
                    './data/train/pretrained_entity_embedding.npy')).float()
        except FileNotFoundError:
            pretrained_entity_embedding = None

        try:
            pretrained_context_embedding = torch.from_numpy(
                np.load(
                    './data/train/pretrained_context_embedding.npy')).float()
        except FileNotFoundError:
            pretrained_context_embedding = None

        model = Model(config, pretrained_word_embedding,
                      pretrained_entity_embedding,
                      pretrained_context_embedding).to(device)
    else:
        model = Model(config, pretrained_word_embedding).to(device)

    print(model)

    dataset = BaseDataset('./data/train/behaviors_parsed.tsv',
                          './data/train/news_parsed.tsv',
                          './data/train/roberta')

    print(f"Load training dataset with size {len(dataset)}.")
    ###############################################
    '''
    dataloader = DataLoader(dataset,
                   batch_size=config.batch_size,
                   shuffle=True,
                   num_workers=config.num_workers,
                   drop_last=True,
                   pin_memory=True)'''
    ###############################################
    # In the step we need to tranform the dataset in federated manner
    '''
    federated_train_loader = sy.FederatedDataLoader(datasets.MNIST(
                                                            '../data', 
                                                            train=True, 
                                                            download=True,
                                                            transform=transforms.Compose(
                                                                            [transforms.ToTensor(),
                                                                             transforms.Normalize((0.1307,), (0.3081,))]
                                                                            )
                                                                    )
    federated_train_loader = sy.FederatedDataLoader( # <-- this is now a FederatedDataLoader 
                                        dataset.federate((bob, alice)), # <-- NEW: we distribute the dataset across all the workers, it's now a FederatedDataset
                                        batch_size=args.batch_size, 
                                        shuffle=True, **kwargs)
    dataloader = iter(sy.FederatedDataLoader(dataset.federate((bob, alice)),
                                            batch_size=config.batch_size,
                                            shuffle=True,
                                            #num_workers=config.num_workers,
                                            drop_last=True,
                                            #pin_memory=True
                                           ))
                                        '''
    #print(dataset)
    dataloader = sy.FederatedDataLoader(dataset.federate((bob, alice)),
                                        batch_size=config.batch_size,
                                        shuffle=True,
                                        num_workers=config.num_workers,
                                        drop_last=True,
                                        pin_memory=True)
    ###############################################
    print(f"The training dataset has been loaded!")
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=config.learning_rate)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=3,
                                                gamma=0.95,
                                                last_epoch=-1)
    start_time = time.time()
    loss_full = []
    exhaustion_count = 0
    step = 0
    early_stopping = EarlyStopping()

    checkpoint_dir = os.path.join('./checkpoint', model_name)
    Path(checkpoint_dir).mkdir(parents=True, exist_ok=True)

    checkpoint_path = latest_checkpoint(checkpoint_dir)
    '''
    if checkpoint_path is not None:
        print(f"Load saved parameters in {checkpoint_path}")
        checkpoint = torch.load(checkpoint_path)
        early_stopping(checkpoint['early_stop_value'])
        step = checkpoint['step']
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        model.train()
    '''

    #for i in tqdm(range(1,config.num_epochs * len(dataset) // config.batch_size + 1),desc="Training"):
    for i, (minibatch, target) in enumerate(dataloader):
        ##### Get a mini batch of data from federated dataset
        #minibatch ,_ = next(dataloader)
        #print(minibatch)
        #print(minibatch.size())
        #exit()
        #minibatch = next(dataloader)
        step += 1
        if model_name == 'LSTUR':
            y_pred = model(minibatch["user"], minibatch["clicked_news_length"],
                           minibatch["candidate_news"],
                           minibatch["clicked_news"])
        elif model_name == 'HiFiArk':
            y_pred, regularizer_loss = model(minibatch["candidate_news"],
                                             minibatch["clicked_news"])
        elif model_name == 'TANR':
            y_pred, topic_classification_loss = model(
                minibatch["candidate_news"], minibatch["clicked_news"])
        else:
            #################################################
            # Send the model
            model.send(minibatch.location)
            minibatch, target = minibatch.to(device), target.to(device)
            #minibatch = minibatch.to(device)
            #################################################

            y_pred = model(minibatch)

        #y = torch.zeros(config.batch_size).long().to(device)
        #print(y_pred.get().size())
        #print(y.size())
        loss = criterion(y_pred, target)

        if model_name == 'HiFiArk':
            if i % 10 == 0:
                writer.add_scalar('Train/BaseLoss', loss.get(), step)
                writer.add_scalar('Train/RegularizerLoss',
                                  regularizer_loss.get(), step)
                writer.add_scalar('Train/RegularizerBaseRatio',
                                  regularizer_loss.get() / loss.get(), step)
            loss += config.regularizer_loss_weight * regularizer_loss
        elif model_name == 'TANR':
            if i % 10 == 0:
                writer.add_scalar('Train/BaseLoss', loss.item(), step)
                writer.add_scalar('Train/TopicClassificationLoss',
                                  topic_classification_loss.item(), step)
                writer.add_scalar(
                    'Train/TopicBaseRatio',
                    topic_classification_loss.item() / loss.item(), step)
            loss += config.topic_classification_loss_weight * topic_classification_loss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()
        model.get()

        loss = loss.get().detach().cpu().item()
        loss_full.append(loss)

        if i % 10 == 0:
            writer.add_scalar('Train/Loss', loss, step)

        if i % config.num_batches_show_loss == 0:
            #print(loss_full)
            #print(type(loss_full))
            tqdm.write(
                f"Time {time_since(start_time)}, batches {i}, current loss {loss:.4f}, average loss: {np.mean(loss_full):.4f}, latest average loss: {np.mean(loss_full[-256:]):.4f}"
            )

        if i % config.num_batches_validate == 0:
            (model if model_name != 'Exp1' else models[0]).eval()
            val_auc, val_mrr, val_ndcg5, val_ndcg10 = evaluate(
                model if model_name != 'Exp1' else models[0], './data/val',
                200000)
            (model if model_name != 'Exp1' else models[0]).train()
            writer.add_scalar('Validation/AUC', val_auc, step)
            writer.add_scalar('Validation/MRR', val_mrr, step)
            writer.add_scalar('Validation/nDCG@5', val_ndcg5, step)
            writer.add_scalar('Validation/nDCG@10', val_ndcg10, step)
            tqdm.write(
                f"Time {time_since(start_time)}, batches {i}, validation AUC: {val_auc:.4f}, validation MRR: {val_mrr:.4f}, validation nDCG@5: {val_ndcg5:.4f}, validation nDCG@10: {val_ndcg10:.4f}, "
            )

            early_stop, get_better = early_stopping(-val_auc)
            if early_stop:
                tqdm.write('Early stop.')
                break
            elif get_better:
                try:
                    torch.save(
                        {
                            'model_state_dict': (model if model_name != 'Exp1'
                                                 else models[0]).state_dict(),
                            'optimizer_state_dict':
                            (optimizer if model_name != 'Exp1' else
                             optimizefrs[0]).state_dict(),
                            'step':
                            step,
                            'early_stop_value':
                            -val_auc
                        }, f"./checkpoint/{model_name}/ckpt-{step}.pth")
                except OSError as error:
                    print(f"OS error: {error}")
def main(argv):

    if len(argv) > 1:
        raise app.UsageError("Too many command-line arguments.")

    tf.random.set_seed(FLAGS.seed)

    if FLAGS.loudness_traindata_proto_file_pattern is None:
        raise app.UsageError(
            "Must provide --loudness_data_proto_file_pattern.")

    log_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                           FLAGS.logs_dir)
    if not os.path.exists(log_dir):
        os.mkdir(log_dir)

    logging.info("TensorFlow seed: %d", FLAGS.seed)

    input_shape = None
    if FLAGS.mode == "test":
        raise NotImplementedError("Did not implement mode test.")
        data = get_datasets(FLAGS.loudness_testdata_proto_file_pattern,
                            1,
                            carfac=FLAGS.use_carfac)
        logging.info("Created testing datasets")
        model = tf.keras.models.load_model(FLAGS.load_model_from_file)
        logging.info("Loaded model")
    elif FLAGS.mode == "train":
        data = get_datasets(FLAGS.loudness_traindata_proto_file_pattern,
                            FLAGS.batch_size,
                            carfac=FLAGS.use_carfac,
                            extra_file_pattern=FLAGS.
                            extra_loudness_traindata_proto_file_pattern)
        frequency_bins = None
        for example in data["train"].take(1):
            input_example, target_example = example
            input_shape = input_example.shape
            carfac_channels = input_example.shape[1]
            frequency_bins = input_example.shape[2]
        logging.info("Created model")
    elif FLAGS.mode == "eval_once":
        data = get_testdata(FLAGS.loudness_testdata_proto_file_pattern,
                            carfac=FLAGS.use_carfac)
        frequency_bins = None
        for example in data["test"].take(1):
            input_example, target_example, _ = example
            input_shape = input_example.shape
            carfac_channels = input_example.shape[1]
            frequency_bins = input_example.shape[2]
    model = LoudnessPredictor(
        frequency_bins=frequency_bins,
        carfac_channels=carfac_channels,
        num_rows_channel_kernel=FLAGS.num_rows_channel_kernel,
        num_cols_channel_kernel=FLAGS.num_cols_channel_kernel,
        num_filters_channels=FLAGS.num_filters_channels,
        num_rows_bin_kernel=FLAGS.num_rows_bin_kernel,
        num_cols_bin_kernel=FLAGS.num_cols_bin_kernel,
        num_filters_bins=FLAGS.num_filters_bins,
        dropout_p=FLAGS.dropout_p,
        use_channels=FLAGS.use_carfac,
        seed=FLAGS.seed)
    if FLAGS.load_from_checkpoint:
        path_to_load = os.path.join(log_dir, FLAGS.load_from_checkpoint)
        logging.info("Attempting to load model from %s", path_to_load)
        loaded = False
        try:
            model.load_weights(path_to_load)
            loaded = True
            logging.info("Loaded model")
        except Exception as err:
            logging.info(
                "Unable to load log dir checkpoint %s, trying "
                "'load_from_checkpoint' flag: %s", path_to_load, err)
            path_to_load = FLAGS.load_from_checkpoint
            try:
                model.load_weights(path_to_load)
                loaded = True
            except Exception as err:
                logging.info("Unable to load flag checkpoint %s: %s",
                             path_to_load, err)
    else:
        loaded = False

    example_image_batch = []
    if FLAGS.mode == "train":
        data_key = "train"
        for example in data[data_key].take(4):
            input_example, target = example
            input_shape = input_example.shape
            tf.print("(start train) input shape: ", input_shape)
            tf.print("(start train) target phons shape: ", target.shape)
            input_example = tf.expand_dims(input_example[0], axis=0)
            example_image_batch.append([input_example, target])

    elif FLAGS.mode == "eval_once":
        data_key = "test"
        for example in data[data_key].take(4):
            input_example, target, _ = example
            input_shape = input_example.shape
            tf.print("(start eval) input shape: ", input_shape)
            tf.print("(start eval) target phons shape: ", target.shape)
            input_example = tf.expand_dims(input_example[0], axis=0)
            example_image_batch.append([input_example, target])

    callbacks = [helpers.StepIncrementingCallback()]
    callbacks.append(
        tf.keras.callbacks.TensorBoard(log_dir=log_dir,
                                       histogram_freq=1,
                                       update_freq="batch",
                                       write_graph=True))
    model.build(input_shape)
    logging.info("Model summary")
    model.summary()

    if FLAGS.extra_loudness_traindata_proto_file_pattern:
        extra_data = True
    else:
        extra_data = False
    save_ckpt = log_dir + "/cp_carfac{}_extradata{}".format(
        FLAGS.use_carfac, extra_data) + "_{epoch:04d}.ckpt"
    logging.info("Save checkpoint to: %s" % save_ckpt)
    callbacks.append(
        tf.keras.callbacks.ModelCheckpoint(filepath=save_ckpt,
                                           save_weights_only=True,
                                           verbose=1,
                                           period=5))

    if FLAGS.mode == "train":
        logging.info("Starting training for %d epochs" % FLAGS.epochs)
        if FLAGS.extra_loudness_traindata_proto_file_pattern:
            steps_per_epoch = (317 + 639) // FLAGS.batch_size
        else:
            steps_per_epoch = 317 // FLAGS.batch_size
        train(model, data["train"], data["validate"], FLAGS.learning_rate,
              FLAGS.epochs, steps_per_epoch, callbacks)
    elif FLAGS.mode == "test":
        raise NotImplementedError("Mode test not implemented.")
        evaluate(model, data["test"], batch_size=FLAGS.eval_batch_size)
    elif FLAGS.mode == "eval_once":
        if not loaded:
            raise ValueError(
                "Trying to eval. a model with unitialized weights.")
        save_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                log_dir)
        write_predictions(model,
                          data["test"],
                          batch_size=1,
                          save_directory=save_dir,
                          save_file=FLAGS.save_predictions_file)
        return
    else:
        raise ValueError("Specified value for '--mode' (%s) unknown",
                         FLAGS.mode)
Example #56
0
def train_and_evaluate(model,
                       data_loader,
                       optimizer,
                       loss_fn,
                       metrics,
                       params,
                       model_dir,
                       restore_file=None):
    """Train the model and evaluate every epoch.
    Args:
        model: (torch.nn.Module) the neural network
        train_data: (dict) training data with keys 'data' and 'labels'
        val_data: (dict) validaion data with keys 'data' and 'labels'
        optimizer: (torch.optim) optimizer for parameters of model
        loss_fn: a function that takes batch_output and batch_labels and computes the loss for the batch
        metrics: (dict) a dictionary of functions that compute a metric using the output and labels of each batch
        params: (Params) hyperparameters
        model_dir: (string) directory containing config, weights and log
        restore_file: (string) optional- name of file to restore from (without its extension .pth.tar)
    """
    ema = utils.EMA(model, params.ema_decay)
    # reload weights from restore_file if specified
    if restore_file is not None:
        restore_path = os.path.join(args.model_dir,
                                    args.restore_file + '.pth.tar')
        logging.info("Restoring parameters from {}".format(restore_path))
        utils.load_checkpoint(restore_path, model, optimizer)

    best_val_acc = 0.0

    for epoch in range(params.num_epochs):
        # Run one epoch
        logging.info("Epoch {}/{}".format(epoch + 1, params.num_epochs))

        # compute number of batches in one epoch (one full pass over the training set)
        num_steps = (params.train_size + 1) // params.batch_size
        train_data_iterator = data_loader.data_iterator(
            split='train', batch_size=params.batch_size)
        train(model, optimizer, loss_fn, train_data_iterator, metrics, params,
              num_steps, ema)

        # Evaluate for one epoch on validation set
        ema.assign(model)
        num_steps = (params.val_size + 1) // params.batch_size
        val_data_iterator = data_loader.data_iterator(
            split='val', batch_size=params.batch_size)
        val_metrics = evaluate(model, loss_fn, val_data_iterator, metrics,
                               params, num_steps)

        val_acc = val_metrics['f1']
        is_best = val_acc >= best_val_acc

        # Save weights
        utils.save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'optim_dict': optimizer.state_dict()
            },
            is_best=is_best,
            checkpoint=model_dir)

        # If best_eval, best_save_path
        if is_best:
            logging.info("- Found new best accuracy")
            best_val_acc = val_acc

            # Save best val metrics in a json file in the model directory
            best_json_path = os.path.join(model_dir,
                                          "metrics_val_best_weights.json")
            utils.save_dict_to_json(val_metrics, best_json_path)

        # Save latest val metrics in a json file in the model directory
        last_json_path = os.path.join(model_dir,
                                      "metrics_val_last_weights.json")
        utils.save_dict_to_json(val_metrics, last_json_path)
        ema.resume(model)
def train_and_evaluate(model, params, dataloader, optimizer, scheduler,
                       loss_fn, metrics, model_dir, log_dir, threshold,
                       cuda_present):
    """Train the model and evaluate every epoch.
    Args:
        model: (torch.nn.Module) the neural network
        train_dataloader: (DataLoader) a torch.utils.data.DataLoader object that fetches training data
        val_dataloader: (DataLoader) a torch.utils.data.DataLoader object that fetches validation data
        optimizer: (torch.optim) optimizer for parameters of model
        loss_fn: a function that takes batch_output and batch_labels and computes the loss for the batch
        metrics: (dict) a dictionary of functions that compute a metric using the output and labels of each batch
        params: (Params) hyperparameters
        model_dir: (string) directory containing config, weights and log
        restore_file: (string) optional- name of file to restore from (without its extension .pth.tar)
    """

    best_val_acc = 0
    for epoch in range(params.num_epochs):
        scheduler.step()
        t0 = time.time()
        '''Do the following for every epoch'''
        # Run one epoch
        logging.info(
            "Epoch {}/{} , : learning rate =  {}  : threshold: {}".format(
                epoch + 1, params.num_epochs, scheduler.get_lr(), threshold))

        #train_image_dict = dataloader.load_data("train", params)
        train_image_dict = dataloader.Create_Img_dict("train", params)
        train_labels_dict = dataloader.load_labels("train", params)
        train_img_count = len(train_image_dict)
        train_data_generator = dataloader.data_iterator(
            params, "train", train_image_dict, train_labels_dict)

        # compute number of batches in one epoch (one full pass over the training set)
        train(model, optimizer, loss_fn, train_data_generator, metrics, params,
              train_img_count, threshold, cuda_present)

        # Evaluate for one epoch on validation set
        val_image_dict = dataloader.load_data("val", params)
        val_labels_dict = dataloader.load_labels("val", params)
        val_img_count = len(val_image_dict)
        val_data_generator = dataloader.data_iterator(params, "val",
                                                      val_image_dict,
                                                      val_labels_dict)
        (val_metrics, threshold) = evaluate.evaluate(model, loss_fn,
                                                     val_data_generator,
                                                     params, val_img_count,
                                                     threshold, cuda_present)

        val_acc = val_metrics['accuracy']
        is_best = val_acc > best_val_acc

        chk_file_name = 'train_DN73_3ch_BBG_nodataaug_Adam__' + 'epoch_' + str(
            epoch) + '__'
        best_file_name = chk_file_name + '.pth.tar'
        lr = params.learning_rate
        # Save weights every 3rd epoch
        #if ((epoch+1) % 3 == 0):
        if (is_best):
            best_val_acc = val_acc
            utils.save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'optim_dict': optimizer.state_dict()
                }, is_best, log_dir, best_file_name)
        t1 = time.time()
        logging.info("Time taken for this epoch = {}".format(t1 - t0))

    # Save weights in the end
    chk_file_name += '__' + 'lr_' + str(lr) + '__' + '.pth.tar'
    utils.save_checkpoint(
        {
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'optim_dict': optimizer.state_dict()
        }, is_best, log_dir, chk_file_name)
Example #58
0
        for j in range(n_sample):
            rank_order_dist[i, j] = float('inf')

    for i in range(n_sample):
        print(i)
        for j in range(i + 1, n_sample):
            index_i = numpy.argwhere(nn_list[i] == j)[0][0]
            index_j = numpy.argwhere(nn_list[j] == i)[0][0]
            if index_i <= k or index_j <= k:
                distance_i = 0
                distance_j = 0
                for r in range(min(index_i, k) + 1):
                    if numpy.argwhere(nn_list[j] == nn_list[i, r])[0][0] > k:
                        distance_i += 1
                for r in range(min(index_j, k) + 1):
                    if numpy.argwhere(nn_list[i] == nn_list[j, r])[0][0] > k:
                        distance_j += 1
                # 对称化距离
                rank_order_dist[i, j] = rank_order_dist[
                    j, i] = (distance_i + distance_j) / min(index_i, index_j)

    return rank_order_dist


if __name__ == '__main__':
    start_time = time()
    result = cluster(0.1, 200)
    time_diff = time() - start_time
    print('time:{}'.format(time_diff))
    evaluate(labels, result)
Example #59
0
        for epoch in range(num_epoch):
            print("epoch:", epoch)
            train_bpr_loss = []
            train_loss = []
            train_data = generate_train_data(train_dict, num_item, num_neg_sample)
            train_batch = generate_train_batch(train_data, batch_size)
            for batch in train_batch:
                bpr_loss, loss, _ = sess.run([model.bpr_loss, model.loss, model.train_op], feed_dict=get_feed_dict(model, batch))
                train_bpr_loss.append(bpr_loss)
                train_loss.append(loss)
            train_bpr_loss = sum(train_bpr_loss) / len(train_data)
            train_loss = sum(train_loss) / len(train_data)
            print("train bpr loss:", train_bpr_loss, "train loss:", train_loss)

            r_hat = sess.run(model.r_hat_ui, feed_dict={model.u: validate_data[:, 0], model.i: validate_data[:, 1]})
            rank_list = np.reshape(r_hat, [-1, 100]).argsort()[:, ::-1].tolist()
            validate_hr, validate_ndcg = evaluate(rank_list, 0, 10)
            print("validate hit ratio:", validate_hr, "validate ndcg:", validate_ndcg)
            result.append([epoch, train_loss, validate_hr, validate_ndcg])

            r_hat = sess.run(model.r_hat_ui, feed_dict={model.u: test_data[:, 0], model.i: test_data[:, 1]})
            rank_list = np.reshape(r_hat, [-1, 100]).argsort()[:, ::-1].tolist()
            test_hr, test_ndcg = evaluate(rank_list, 0, 10)
            print("test hit ratio:", test_hr, "test ndcg:", test_ndcg)

    print("over!")
    with open('BPR_{dataset}_{num_factor}_{reg_rate}.csv'.format(dataset=dataset, num_factor=num_factor, reg_rate=reg_rate), 'w', newline='') as f:
        writer = csv.writer(f)
        for line in result:
            writer.writerow(line)
Example #60
0
import os
from config import workspace_path
from train import train
from config import Model_Config
from prepare import prepare
from predict import predict
from evaluate import evaluate

print(os.getcwd())
os.chdir(workspace_path)
os.mkdir('log')
os.mkdir('hdf')
print(os.getcwd())
print(os.listdir('./'))

if __name__ == '__main__':
    # prepare
    X_train_processed, X_val_processed, Y_train_list, Y_val_list, vocabulary_size, label_distribute_dict_list = prepare(
    )
    # train
    model, history = train(X_train_processed, X_val_processed, Y_train_list,
                           Y_val_list, vocabulary_size,
                           label_distribute_dict_list)
    # predict
    preds = predict(X_val_processed, Model_Config.model_saved_filepath)
    # evaluate
    validate_df, df_pcf = evaluate(preds)
    # print precision、recall、f1 值
    print(df_pcf)