Ejemplos de Evaluator.evaluate en Python, ejemplos de src.evaluator.Evaluator.evaluate en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: evaluate.py Proyecto: qianrenjian/simple_effective_text_matching_Chinese

def main():
    argv = sys.argv
    if len(argv) == 3:
        model_path, data_file = argv[1:]
        evaluator = Evaluator(model_path, data_file)
        evaluator.evaluate()
    else:
        print('Usage: "python evaluate.py $model_path $data_file"')

Ejemplo n.º 2

0

Mostrar archivo

def main(argv):

    # initialize evaluator and evaluate
    evaluator = Evaluator(
        FLAGS.experiment_path,
        FLAGS.src_test_path,
        FLAGS.tgt_test_path,
        FLAGS.save_as_pretrained,
    )
    evaluator.evaluate()

Ejemplo n.º 3

0

Mostrar archivo

Archivo: test_evaluator.py Proyecto: rhermosoUZ/Re-CoSKQ

 def test_general(self):
     ev = Evaluator()
     possible_keywords = [
         'family', 'food', 'outdoor', 'rest', 'indoor', 'sports', 'science',
         'culture', 'history'
     ]
     dg = DataGenerator(possible_keywords)
     gen_query = dg.generate(1)[0]
     gen_data = dg.generate(10)
     cf1 = Type1(euclidean_distance,
                 combined_cosine_similarity,
                 0.33,
                 0.33,
                 0.33,
                 disable_thresholds=True)
     cf2 = Type2(euclidean_distance,
                 combined_cosine_similarity,
                 0.33,
                 0.33,
                 0.33,
                 disable_thresholds=True)
     cf3 = Type3(euclidean_distance,
                 combined_cosine_similarity,
                 0.33,
                 0.33,
                 0.33,
                 disable_thresholds=True)
     ns1 = NaiveSolver(gen_query,
                       gen_data,
                       cf1,
                       result_length=10,
                       max_subset_size=6)
     ns2 = NaiveSolver(gen_query,
                       gen_data,
                       cf2,
                       result_length=10,
                       max_subset_size=6)
     ns3 = NaiveSolver(gen_query,
                       gen_data,
                       cf3,
                       result_length=10,
                       max_subset_size=6)
     ev.add_solver(ns1)
     ev.add_solver(ns2)
     ev.add_solver(ns3)
     ev.evaluate()
     results = ev.get_results()
     self.assertEqual(len(results), 3)
     self.assertEqual(len(results[0]), 2)
     self.assertEqual(len(results[1]), 2)
     self.assertEqual(len(results[2]), 2)
     self.assertEqual(len(results[0][0]), 10)
     self.assertEqual(len(results[1][0]), 10)
     self.assertEqual(len(results[2][0]), 10)

Ejemplo n.º 4

0

Mostrar archivo

Archivo: evaluate.py Proyecto: dnguyen1196/word-embedding-cp

def score_embedding(embedding, groups):
    evaluator = Evaluator(groups)
    evaluator.evaluate(embedding)
    print("   RESULTS")
    print("==============")
    print("OPP score: %f" % evaluator.opp)
    print("Accuracy: %f" % evaluator.accuracy)
    print("---------------------------------")
    print("Total number of test groups: %d" % evaluator.num_total_groups)
    print("Number of filtered test groups: %d (%f%%)" % (evaluator.num_filtered_groups, evaluator.percent_filtered_groups))
    print("Total number of non-OOV test cases: %d" % evaluator.num_cases)
    print("Number of filtered cluster entities: %d/%d (mean per %% cluster: %f%%)" % (evaluator.num_filtered_cluster_items, evaluator.num_total_cluster_items, evaluator.percent_filtered_cluster_items))
    print("Number of filtered outlier entities: %d/%d (mean per %% cluster: %f%%)" % (evaluator.num_filtered_outliers, evaluator.num_total_outliers, evaluator.percent_filtered_outliers))

Ejemplo n.º 5

0

Mostrar archivo

Archivo: impulso.py Proyecto: pystokes/caption_detection

    def evaluate(self):
        logger.info('Begin evaluate of Impulso')
        if self.args.experiment_id:
            module = importlib.import_module(
                f'experiments.{self.args.experiment_id}.src.evaluator')
            Evaluator = getattr(module, 'Evaluator')
        else:
            from src.evaluator import Evaluator

        evaluator = Evaluator(self.args.exec_type, self.hparams)
        evaluator.load_data()
        evaluator.evaluate()
        logger.info('End evaluate of Impulso')

Ejemplo n.º 6

0

Mostrar archivo

Archivo: main.py Proyecto: vin-nag/checkYourPerturbations

def run(args) -> None:
    """
    This function parses the arguments provided and runs the experiment
    :param args: the arguments provided by the user.
    :return: None
    """
    selector = GeneratorSelector()
    generators = selector.getAllGenerators()
    benchmark = Benchmark(benchmarkType=args.benchmark, verbose=args.verbose)
    evaluator = Evaluator(benchmark=benchmark,
                          generators=generators,
                          timeLimit=benchmark.timeLimit,
                          verbose=args.verbose)
    evaluator.evaluate()
    sys.exit(0)

Ejemplo n.º 7

0

Mostrar archivo

def search():
    if request.method == 'POST':
        screen_name = request.form['screen_name']
        evaluator = Evaluator(app.root_path)
        status, result = evaluator.evaluate(screen_name)
        if status != 200:
            raise Err(result, status_code=status)
        return json.dumps(result)
    else:
        return False

Ejemplo n.º 8

0

Mostrar archivo

Archivo: demo.py Proyecto: AdamQinwt/package-delivery

                    help="the number of orders to parse", type=int, default=2400)
parser.add_argument("--process_num", dest="NUM_PROCESSES",
                    help="the number of processes", type=int, default=16)
parser.add_argument("--weight_amount", dest="WEIGHT_AMOUNT",
                    help="the weight of the amountCost in the objective function", type=float, default=15)
parser.add_argument("--weight_time", dest="WEIGHT_TIME",
                    help="the weight of the timeCost in the objective function", type=float, default=1)
parser.add_argument("--hub_cost_const", dest="HUB_BUILT_COST_CONST",
                    help="the constant part of cost of building a hub", type=int, default=3000)
parser.add_argument("--hub_cost_vary", dest="HUB_BUILT_COST_VARY",
                    help="the variable part cost of building a hub", type=float, default=2.0)
parser.add_argument("--hub_ratio", dest="HUB_UNIT_COST_RATIO",
                    help="the cutoff of unit price of a hub", type=float, default=0.7)
parser.add_argument("--hub_capacity", dest="HUB_CAPACITY",
                    help="the capacity of the hub(in prob3)", type=int, default=500)

args = parser.parse_args()

arg_dict = edict(vars(args))
merge_a_into_b(arg_dict, cfg)
print("Using the config:")
print(cfg)

solver = Solver()
evaluator = Evaluator()

solver.solve(problem_id=args.PROBLEM_ID)
# disable the tune mode but plot the distribution, see the outputs for more detail
evaluator.evaluate(prob_id=args.PROBLEM_ID, tune=False, plot=True)

Ejemplo n.º 9

0

Mostrar archivo

rw.results_to_file(results_file_sqlm_prf, results_sqlm_prf, 'sqlm_prf')
rw.results_to_file(results_file_bm25, results_bm25, 'bm25')
rw.results_to_file(results_file_bm25_stop, results_bm25_stop, 'bm25_stop')
rw.results_to_file(results_file_bm25_stem, results_bm25_stem, 'bm25_stem')

eval_tfidf = Evaluator(results_file_tfidf, 'tfidf')
eval_tfidf_stop = Evaluator(results_file_tfidf_stop, 'tfidf_stop')
eval_sqlm = Evaluator(results_file_sqlm, 'sqlm')
eval_sqlm_stop = Evaluator(results_file_sqlm_stop, 'sqlm_stop')
eval_sqlm_prf = Evaluator(results_file_sqlm_prf, 'sqlm_prf')
eval_bm25 = Evaluator(results_file_bm25, 'bm25')
eval_bm25_stop = Evaluator(results_file_bm25_stop, 'bm25_stop')
eval_lucene = Evaluator(results_file_lucene, 'lucene')

print('Evaluating results for tf.idf...', end='')
eval_tfidf.evaluate()
eval_tfidf.eval_to_file()
print('Done')

print('Evaluating results for tf.idf with stopping...', end='')
eval_tfidf_stop.evaluate()
eval_tfidf_stop.eval_to_file()
print('Done')

print('Evaluating results for Smoothed Query Likelihood...', end='')
eval_sqlm.evaluate()
eval_sqlm.eval_to_file()
print('Done')

print('Evaluating results for Smoothed Query Likelihood with stopping...',
      end='')

Ejemplo n.º 10

0

Mostrar archivo

def main():
    if not os.path.exists(config.result_dir):
        os.makedirs(config.result_dir)
    if not os.path.exists(config.train_log_dir):
        os.makedirs(config.train_log_dir)
    if not os.path.exists(config.valid_log_dir):
        os.makedirs(config.valid_log_dir)

    print('preparing data...')
    config.word_2_id, config.id_2_word = read_dict(config.word_dict)
    config.attr_2_id, config.id_2_attr = read_dict(config.attr_dict)
    config.vocab_size = min(config.vocab_size, len(config.word_2_id))
    config.oov_vocab_size = len(config.word_2_id) - config.vocab_size
    config.attr_size = len(config.attr_2_id)

    embedding_matrix = None
    if args.do_train:
        if os.path.exists(config.glove_file):
            print('loading embedding matrix from file: {}'.format(config.glove_file))
            embedding_matrix, config.word_em_size = load_glove_embedding(config.glove_file, list(config.word_2_id.keys()))
            print('shape of embedding matrix: {}'.format(embedding_matrix.shape))
    else:
        if os.path.exists(config.glove_file):
            with open(config.glove_file, 'r', encoding='utf-8') as fin:
                line = fin.readline()
                config.word_em_size = len(line.strip().split()) - 1

    data_reader = DataReader(config)
    evaluator = Evaluator('description')

    print('building model...')
    model = get_model(config, embedding_matrix)
    saver = tf.train.Saver(max_to_keep=10)

    if args.do_train:
        print('loading data...')
        train_data = data_reader.read_train_data()
        valid_data = data_reader.read_valid_data_small()

        print_title('Trainable Variables')
        for v in tf.trainable_variables():
            print(v)

        print_title('Gradients')
        for g in model.gradients:
            print(g)

        with tf.Session(config=sess_config) as sess:
            model_file = args.model_file
            if model_file is None:
                model_file = tf.train.latest_checkpoint(config.result_dir)
            if model_file is not None:
                print('loading model from {}...'.format(model_file))
                saver.restore(sess, model_file)
            else:
                print('initializing from scratch...')
                tf.global_variables_initializer().run()

            train_writer = tf.summary.FileWriter(config.train_log_dir, sess.graph)
            valid_writer = tf.summary.FileWriter(config.valid_log_dir, sess.graph)

            run_train(sess, model, train_data, valid_data, saver, evaluator, train_writer, valid_writer, verbose=True)

    if args.do_eval:
        print('loading data...')
        valid_data = data_reader.read_valid_data()

        with tf.Session(config=sess_config) as sess:
            model_file = args.model_file
            if model_file is None:
                model_file = tf.train.latest_checkpoint(config.result_dir)
            if model_file is not None:
                print('loading model from {}...'.format(model_file))
                saver.restore(sess, model_file)

                predicted_ids, alignment_history, valid_loss, valid_accu = run_evaluate(sess, model, valid_data, verbose=True)
                print('average valid loss: {:>.4f}, average valid accuracy: {:>.4f}'.format(valid_loss, valid_accu))

                print_title('Saving Result')
                save_result(predicted_ids, alignment_history, config.id_2_word, config.valid_data, config.valid_result)
                evaluator.evaluate(config.valid_data, config.valid_result, config.to_lower)
            else:
                print('model not found!')

    if args.do_test:
        print('loading data...')
        test_data = data_reader.read_test_data()

        with tf.Session(config=sess_config) as sess:
            model_file = args.model_file
            if model_file is None:
                model_file = tf.train.latest_checkpoint(config.result_dir)
            if model_file is not None:
                print('loading model from {}...'.format(model_file))
                saver.restore(sess, model_file)

                predicted_ids, alignment_history = run_test(sess, model, test_data, verbose=True)

                print_title('Saving Result')
                save_result(predicted_ids, alignment_history, config.id_2_word, config.test_data, config.test_result)
                evaluator.evaluate(config.test_data, config.test_result, config.to_lower)
            else:
                print('model not found!')

Ejemplo n.º 11

0

Mostrar archivo

def main(argv):
    start_time = time.time()

    # Evaluator, instantiate it first for logging purposes
    ev = Evaluator()

    query: KeywordCoordinate = load_pickle('query.pickle')
    print('Query:', query)
    data: dataset_type = load_pickle('dataset.pickle')
    # print('Data:', dataset_comprehension(data))

    # Let's filter out by user radius
    # dataAux = sorted(data, key=lambda x: geographic_distance(x.coordinates, query.coordinates))
    # distances = [geographic_distance(x.coordinates, query.coordinates) >= RADIUS for x in dataAux]
    # print('------ Distances: ', distances)

    # Load precalculated values and models
    precalculated_inter_dataset_distances = load_pickle(
        'precalculated_inter_dataset_distances.pickle')
    precalculated_query_dataset_distances = load_pickle(
        'precalculated_query_dataset_distances.pickle')
    precalculated_query_dataset_keyword_similarities = load_pickle(
        'precalculated_query_dataset_keyword_similarities.pickle')

    # **** ONLY FOR word2vec model executions
    precalculated_query_dataset_keyword_similarities_word2vec = load_pickle(
        'precalculated_query_dataset_keyword_similarities_word2vec.pickle')
    word2vec_model = load_word2vec_model('word2vec_model.pickle')
    # ****

    # Define the CostFunctions. For all possible parameters refer to the documentation.
    cf1 = Type1(euclidean_distance, combined_cosine_similarity, 0.2, 0.1, 0.7)
    cf2 = Type2(euclidean_distance,
                word2vec_cosine_similarity,
                0.2,
                0.1,
                0.7,
                model=word2vec_model)
    cf3 = Type1(
        euclidean_distance,
        combined_cosine_similarity,
        0.2,
        0.1,
        0.7,
        precalculated_inter_dataset_dict=precalculated_inter_dataset_distances,
        precalculated_query_dataset_dict=precalculated_query_dataset_distances,
        precalculated_keyword_similarity_dict=
        precalculated_query_dataset_keyword_similarities)
    cf4 = Type2(
        euclidean_distance,
        word2vec_cosine_similarity,
        0,
        0,
        1.0,
        precalculated_inter_dataset_dict=precalculated_inter_dataset_distances,
        precalculated_query_dataset_dict=precalculated_query_dataset_distances,
        precalculated_keyword_similarity_dict=
        precalculated_query_dataset_keyword_similarities_word2vec,
        model=word2vec_model)
    cf5 = Type3(
        euclidean_distance,
        word2vec_cosine_similarity,
        0.1,
        0.1,
        0.8,
        precalculated_inter_dataset_dict=precalculated_inter_dataset_distances,
        precalculated_query_dataset_dict=precalculated_query_dataset_distances,
        precalculated_keyword_similarity_dict=
        precalculated_query_dataset_keyword_similarities_word2vec,
        model=word2vec_model)
    cf6 = Type1(
        euclidean_distance,
        word2vec_cosine_similarity,
        0.2,
        0.1,
        0.7,
        precalculated_inter_dataset_dict=precalculated_inter_dataset_distances,
        precalculated_query_dataset_dict=precalculated_query_dataset_distances,
        precalculated_keyword_similarity_dict=
        precalculated_query_dataset_keyword_similarities,
        model=word2vec_model)

    map_name = argv[0]
    # map_name = 'London_mini'
    # Choose which Solvers to use. For all possible parameters refer to the documentation.
    max_number_of_processes = mp.cpu_count()
    ns1 = NaiveSolver(
        query,
        data,
        cf2,
        result_length=5,
        max_subset_size=3,
        max_number_of_concurrent_processes=max_number_of_processes,
        _map=map_name)
    # ns2 = NaiveSolver(query, data, cf5, result_length=5, max_subset_size=3,
    # max_number_of_concurrent_processes=max_number_of_processes, _map = map_name)
    # ns3 = NaiveSolver(query, data, cf3, result_length=5, max_subset_size=6,
    #                   max_number_of_concurrent_processes=max_number_of_processes)
    # ns4 = NaiveSolver(query, data, cf6, result_length=5, max_subset_size=3,
    # max_number_of_concurrent_processes=max_number_of_processes, _map = map_name)

    # Add Solvers to Evaluator
    ev.add_solver(ns1)
    # ev.add_solver(ns2)
    # ev.add_solver(ns4)

    #Only for Debug: calculates and print physical distances between items in the dataset and the query location
    #distances = [geographic_distance(x.coordinates, query.coordinates) for x in data]
    # print('------ Distances: ', distances

    # Run Evaluator and fetch results
    ev.evaluate()
    results = ev.get_results()
    timings = ev.get_timings()

    write_csv(map_name, results, timings)

    print('*** Solution -', solution_list_comprehension(results))
    # print('*** Timing -', timing_list_comprehension(timings))

    initialLat = []
    initialLon = []

    keywords = []

    gmap = gmplot.GoogleMapPlotter(query.coordinates.x, query.coordinates.y,
                                   14)

    colors = ['red', 'blue', 'green', 'purple', 'orange']

    # Third dimension is the order of solution (Best: 0, Second best: 1...)
    for i in range(5):
        lats = []
        lons = []
        for kwc in results[0][0][i][1]:
            lats.append(kwc.coordinates.x)
            lons.append(kwc.coordinates.y)
            keywords.append(kwc.keywords)
        for j in range(len(lats)):
            gmap.marker(lats[j], lons[j], color=colors[i])
        gmap.polygon(lats, lons, color='cornflowerblue', edge_width=7)

        # initialLat.append(query.coordinates.x)
        # initialLon.append(query.coordinates.y)

        # gmap.scatter(initialLat, initialLon, '#00FF00', size = 70, marker = False)
        # gmap.scatter(lats, lons, '#FF0000',size = 50, marker = False )

        # gmap.plot(lats, lons, 'cornflowerblue', edge_width = 3.0)
        # gmap.polygon(lats, lons, color='cornflowerblue', edge_width=10)

        # gmap.scatter(lats, lons, color='#3B0B39', size=40, marker=False)

        #Your Google_API_Key
        #gmap.apikey = " API_Key”
        # save it to html
    # gmap.scatter(lats, lons, '#FF0000', size=40, marker=True)
    gmap.marker(query.coordinates.x,
                query.coordinates.y,
                color='cornflowerblue',
                title='Query point')
    gmap.draw(r"graphic_results.html")

    print("--- %s seconds ---" % (time.time() - start_time))

Ejemplo n.º 12

0

Mostrar archivo

     # ns3 = NaiveSolver(query, data, cf3, result_length=5, max_subset_size=6,
     #                   max_number_of_concurrent_processes=max_number_of_processes)
     # ns4 = NaiveSolver(query, data, cf6, result_length=5, max_subset_size=3,
                       # max_number_of_concurrent_processes=max_number_of_processes, _map = map_name)
 
     # Add Solvers to Evaluator
     ev.add_solver(ns1)
     # ev.add_solver(ns2)
     # ev.add_solver(ns4)
 
     #Only for Debug: calculates and print physical distances between items in the dataset and the query location
     #distances = [geographic_distance(x.coordinates, query.coordinates) for x in data]
     # print('------ Distances: ', distances
     
     # Run Evaluator and fetch results
     ev.evaluate()
     results = ev.get_results()
     timings = ev.get_timings()
     
     write_csv(map_name, results, timings)
     
     
     print('*** Solution -', solution_list_comprehension(results))
     # print('*** Timing -', timing_list_comprehension(timings))
     
     
     # initialLat = []
     # initialLon = []
     
     # keywords = []

Ejemplo n.º 13

0

Mostrar archivo

Archivo: genetic.py Proyecto: annabellachen/SignalProcessing

class GeneticEngine:
    def __init__(self, data):
        self._evaluator = Evaluator(data)
        self._item_count = self._evaluator.generator.rule_set_len
        self._best_ind = None
        self._best_fit_val = 0

        # To assure reproductibility, the RNG seed is set prior to the items
        # dict initialization. It is also seeded in main().
        random.seed(64)

        # # Create the item dictionary: item name is an integer, and value is
        # # a weight.
        # items = {}
        # # Create random items and store them in the items' dictionary. (18 rule * 9)
        # for i in range(NBR_ITEMS):
        #     items[i] = i;
        creator.create("FitnessMax", base.Fitness, weights=(1.0, ))
        creator.create("Individual", set, fitness=creator.FitnessMax)

        self.toolbox = base.Toolbox()

        # Attribute generator
        #       define 'attr_item' to be an attribute ('gene')
        #       which corresponds to integers sampled uniformly
        #       from the range [1,10] (i.e. 1 to 10 with equal probability)
        self.toolbox.register("attr_item", random.randrange, self._item_count)

        # Structure initializers
        #       define 'individual' to be an individual
        #       consisting of 10 'attr_item' elements ('genes')
        self.toolbox.register("individual", tools.initRepeat,
                              creator.Individual, self.toolbox.attr_item,
                              IND_INIT_SIZE)

        # define the population to be a list of individuals
        self.toolbox.register("population", tools.initRepeat, list,
                              self.toolbox.individual)

        self.toolbox.register("evaluate", self.eval_ind)
        self.toolbox.register("mate", self.cx_ind)
        self.toolbox.register("mutate", self.mutate_ind)
        self.toolbox.register("select", tools.selNSGA2)

    @property
    def evaluator(self):
        return self._evaluator

    def eval_ind(self, ind):
        """
        calculate the fitness of the individual

        :param ind: the individual Chromosome object to be evaluated
        :return: the fitness value
        """
        print("\n:::: [genetic] individual", ind, "::::")
        start = dt.now()
        fit_val = self._evaluator.evaluate(ind)
        if fit_val > self._best_fit_val:
            self._best_ind = ind
            self._best_fit_val = fit_val
        print(":::: [genetic] Evaluate individual. fitness value", fit_val,
              "Duration",
              dt.now() - start, "::::\n")
        return fit_val, None

    def mutate_ind(self, ind, mu=0, sigma=4, chance_mutation=0.4):
        """
        Mutate the individual by changing the Chromosome composition
        :param mu:
        :param sigma:
        :return:
        """
        if random.random() < chance_mutation:
            if len(ind) > 0:  # We cannot pop from an empty set
                ind.remove(random.choice(sorted(tuple(ind))))
        else:
            ind.add(random.randrange(self._item_count))
        return ind

    def cx_ind(self, ind1, ind2, chance_crossover=0.7):
        """Apply a crossover operation on input sets. The first child is the
        intersection of the two sets, the second child is the difference of the
        two sets.
        """
        if random.random() < chance_crossover:
            temp = set(ind1)  # Used in order to keep type
            ind1 &= ind2  # Intersection (inplace)
            ind2 ^= temp  # Symmetric Difference (inplace)
        return ind1, ind2

    def verify_ind(self, ind):
        """
        Verify the validity of the individual Chromosome

        :param ind: the individual Chromosome to be verified
        :return: True if the Chromosome is valid, False otherwise
        """
        # TODO: will it be better if we define the method as the member method of Chromosome?
        return True

    def best_ind(self):
        """
        Get the best individual after the GA calculation

        :return: the best individual Chromosome
        """
        random.seed(64)
        NGEN = 50
        MU = 50
        LAMBDA = 100
        CXPB = 0.7
        MUTPB = 0.2

        pop = self.toolbox.population(n=MU)
        hof = tools.ParetoFront()
        stats = tools.Statistics(lambda ind: ind.fitness.values)
        stats.register("avg", numpy.mean, axis=0)
        stats.register("std", numpy.std, axis=0)
        stats.register("min", numpy.min, axis=0)
        stats.register("max", numpy.max, axis=0)
        try:
            algorithms.eaMuPlusLambda(pop,
                                      self.toolbox,
                                      MU,
                                      LAMBDA,
                                      CXPB,
                                      MUTPB,
                                      NGEN,
                                      stats,
                                      halloffame=hof)
        except Exception as err:
            print(err)
            return self._best_ind

        print("The best individual is :", hof[-1])
        print(len(pop))
        print(len(hof))
        # print("The best fitness is :", eval_ind(self, hof[-1]))
        return hof[-1]

Ejemplo n.º 14

0

Mostrar archivo

def main():
    os.makedirs(config.temp_dir, exist_ok=True)
    os.makedirs(config.result_dir, exist_ok=True)
    os.makedirs(config.train_log_dir, exist_ok=True)

    logger.setLevel(logging.INFO)
    init_logger(logging.INFO, 'temp.log.txt', 'w')

    logger.info('preparing data...')
    config.word_2_id, config.id_2_word = read_json_dict(config.vocab_dict)
    config.vocab_size = min(config.vocab_size, len(config.word_2_id))
    config.oov_vocab_size = min(config.oov_vocab_size,
                                len(config.word_2_id) - config.vocab_size)

    embedding_matrix = None
    if args.do_train:
        if os.path.exists(config.glove_file):
            logger.info('loading embedding matrix from file: {}'.format(
                config.glove_file))
            embedding_matrix, config.word_em_size = load_glove_embedding(
                config.glove_file, list(config.word_2_id.keys()))
            logger.info('shape of embedding matrix: {}'.format(
                embedding_matrix.shape))
    else:
        if os.path.exists(config.glove_file):
            with open(config.glove_file, 'r', encoding='utf-8') as fin:
                line = fin.readline()
                config.word_em_size = len(line.strip().split()) - 1

    data_reader = DataReader(config)
    evaluator = Evaluator('tgt')

    logger.info('building model...')
    model = get_model(config, embedding_matrix)
    saver = tf.train.Saver(max_to_keep=10)

    if args.do_train:
        logger.info('loading data...')
        train_data = data_reader.read_train_data()
        valid_data = data_reader.read_valid_data()

        logger.info(log_title('Trainable Variables'))
        for v in tf.trainable_variables():
            logger.info(v)

        logger.info(log_title('Gradients'))
        for g in model.gradients:
            logger.info(g)

        with tf.Session(config=sess_config) as sess:
            model_file = args.model_file
            if model_file is None:
                model_file = tf.train.latest_checkpoint(
                    os.path.join(config.result_dir, config.current_model))
            if model_file is not None:
                logger.info('loading model from {}...'.format(model_file))
                saver.restore(sess, model_file)
            else:
                logger.info('initializing from scratch...')
                tf.global_variables_initializer().run()

            train_writer = tf.summary.FileWriter(config.train_log_dir,
                                                 sess.graph)

            valid_log_history = run_train(sess, model, train_data, valid_data,
                                          saver, evaluator, train_writer)
            save_json(
                valid_log_history,
                os.path.join(config.result_dir, config.current_model,
                             'valid_log_history.json'))

    if args.do_eval:
        logger.info('loading data...')
        valid_data = data_reader.read_valid_data()

        with tf.Session(config=sess_config) as sess:
            model_file = args.model_file
            if model_file is None:
                model_file = tf.train.latest_checkpoint(
                    os.path.join(config.result_dir, config.current_model))
            if model_file is not None:
                logger.info('loading model from {}...'.format(model_file))
                saver.restore(sess, model_file)

                predicted_ids, valid_loss, valid_accu = run_evaluate(
                    sess, model, valid_data)
                logger.info(
                    'average valid loss: {:>.4f}, average valid accuracy: {:>.4f}'
                    .format(valid_loss, valid_accu))

                logger.info(log_title('Saving Result'))
                save_outputs(predicted_ids, config.id_2_word,
                             config.valid_data, config.valid_outputs)
                results = evaluator.evaluate(config.valid_data,
                                             config.valid_outputs,
                                             config.to_lower)
                save_json(results, config.valid_results)
            else:
                logger.info('model not found!')

    if args.do_test:
        logger.info('loading data...')
        test_data = data_reader.read_test_data()

        with tf.Session(config=sess_config) as sess:
            model_file = args.model_file
            if model_file is None:
                model_file = tf.train.latest_checkpoint(
                    os.path.join(config.result_dir, config.current_model))
            if model_file is not None:
                logger.info('loading model from {}...'.format(model_file))
                saver.restore(sess, model_file)

                predicted_ids = run_test(sess, model, test_data)

                logger.info(log_title('Saving Result'))
                save_outputs(predicted_ids, config.id_2_word, config.test_data,
                             config.test_outputs)
                results = evaluator.evaluate(config.test_data,
                                             config.test_outputs,
                                             config.to_lower)
                save_json(results, config.test_results)
            else:
                logger.info('model not found!')

Ejemplo n.º 15

0

Mostrar archivo

def main():
    os.makedirs(config.temp_dir, exist_ok=True)
    os.makedirs(config.result_dir, exist_ok=True)
    os.makedirs(config.train_log_dir, exist_ok=True)

    logger.setLevel(logging.INFO)
    init_logger(logging.INFO)

    logger.info('loading dict...')
    config.src_2_id, config.id_2_src = read_json_dict(config.src_vocab_dict)
    config.src_vocab_size = min(config.src_vocab_size, len(config.src_2_id))
    config.tgt_2_id, config.id_2_tgt = read_json_dict(config.tgt_vocab_dict)
    config.tgt_vocab_size = min(config.tgt_vocab_size, len(config.tgt_2_id))

    data_reader = DataReader(config)
    evaluator = Evaluator('tgt')

    logger.info('building model...')
    model = get_model(config)
    saver = tf.train.Saver(max_to_keep=10)

    if args.do_train:
        logger.info('loading data...')
        train_data = data_reader.load_train_data()
        valid_data = data_reader.load_valid_data()

        logger.info(log_title('Trainable Variables'))
        for v in tf.trainable_variables():
            logger.info(v)

        logger.info(log_title('Gradients'))
        for g in model.gradients:
            logger.info(g)

        with tf.Session(config=sess_config) as sess:
            model_file = args.model_file
            if model_file is None:
                model_file = tf.train.latest_checkpoint(
                    os.path.join(config.result_dir, config.current_model))
            if model_file is not None:
                logger.info('loading model from {}...'.format(model_file))
                saver.restore(sess, model_file)
            else:
                logger.info('initializing from scratch...')
                tf.global_variables_initializer().run()

            train_writer = tf.summary.FileWriter(config.train_log_dir,
                                                 sess.graph)
            valid_log_history = run_train(sess, model, train_data, valid_data,
                                          saver, evaluator, train_writer)
            save_json(
                valid_log_history,
                os.path.join(config.result_dir, config.current_model,
                             'valid_log_history.json'))

    if args.do_eval:
        logger.info('loading data...')
        valid_data = data_reader.load_valid_data()

        with tf.Session(config=sess_config) as sess:
            model_file = args.model_file
            if model_file is None:
                model_file = tf.train.latest_checkpoint(
                    os.path.join(config.result_dir, config.current_model))
            if model_file is not None:
                logger.info('loading model from {}...'.format(model_file))
                saver.restore(sess, model_file)

                predicted_ids, valid_loss, valid_accu = run_evaluate(
                    sess, model, valid_data)
                logger.info(
                    'average valid loss: {:>.4f}, average valid accuracy: {:>.4f}'
                    .format(valid_loss, valid_accu))

                logger.info(log_title('Saving Result'))
                save_outputs(predicted_ids, config.id_2_tgt, config.valid_data,
                             config.valid_outputs)
                results = evaluator.evaluate(config.valid_data,
                                             config.valid_outputs,
                                             config.to_lower)
                save_json(results, config.valid_results)
            else:
                logger.info('model not found!')

    if args.do_test:
        logger.info('loading data...')
        test_data = data_reader.load_test_data()

        with tf.Session(config=sess_config) as sess:
            model_file = args.model_file
            if model_file is None:
                model_file = tf.train.latest_checkpoint(
                    os.path.join(config.result_dir, config.current_model))
            if model_file is not None:
                logger.info('loading model from {}...'.format(model_file))
                saver.restore(sess, model_file)

                predicted_ids = run_test(sess, model, test_data)

                logger.info(log_title('Saving Result'))
                save_outputs(predicted_ids, config.id_2_tgt, config.test_data,
                             config.test_outputs)
                results = evaluator.evaluate(config.test_data,
                                             config.test_outputs,
                                             config.to_lower)
                save_json(results, config.test_results)
            else:
                logger.info('model not found!')