コード例 #1
0
ファイル: train.py プロジェクト: ancue/relation_extraction
def test(sess, m_val):
    m_val.restore(sess)
    fetches = [m_val.accuracy, m_val.label]
    accuracy, predictions = sess.run(fetches)
    print('accuracy: %.4f' % accuracy)

    utils.write_results(predictions, FLAGS.relation_file, FLAGS.results_file)
コード例 #2
0
def main():

    # Run model.
    model, results = experiments.tspec()

    # Plot training curves.
    visualize.training(results)

    # Save model.
    torch.save(
        model,
        os.path.join(PKG_PATH, 'models/best_tspec_model_{}.pt'.format(TSTAMP)))

    # Save results.
    utils.write_results(
        results,
        os.path.join(PKG_PATH,
                     'models/best_tspec_results_{}.pkl'.format(TSTAMP)))

    # Visualizations using non-shuffled data.
    train_data = utils.Data(train=True, augmentation=True)
    valid_data = utils.Data(train=False, augmentation=False)

    visualize.spectra(train_data, log=False, name='spectra_train')
    visualize.spectra(valid_data, log=False, name='spectra_valid')
    visualize.timeseries(train_data, name='timeseries_train')
    visualize.timeseries(valid_data, name='timeseries_valid')
    visualize.pca(train_data)
    visualize.tsne(train_data)
コード例 #3
0
ファイル: depict_keras.py プロジェクト: vonalan/DC
def main():
    print(alpha)

    xtrain = np.loadtxt(FLAGS.path_to_xtrain)
    xtest = np.loadtxt(FLAGS.path_to_xtest)
    xrand = np.loadtxt(FLAGS.path_to_xrand)
    print(xtrain.shape, xtest.shape, xrand.shape)

    FLAGS.depict_input_dim = 162
    FLAGS.rbfnn_num_center = 120
    for i in range(7, 16 + 1):
        k = 1 << i
        FLAGS.depict_output_dim = k
        FLAGS.rbfnn_input_dim = k
        pprint.pprint(FLAGS)

        depict_input_shape = (162, )
        base_model, train_model = build(depict_input_shape)

        for i in range(20):
            train_model.compile(optimizer=Adam(lr=1e-4), loss=depict_loss)
            train_model.fit(xtrain, xtrain, epochs=1, validation_split=0.2)
            ys_train = base_model.predict(xtrain)
            ys_test = base_model.predict(xtest)
            metrics = classifier.run_with_soft_assignment(
                ys_train, ys_test, FLAGS)

            # metrics = classifier.run(ys_train, ys_test, FLAGS)

            print('num_cluster: %d, iteration: %d, alpha: %f' % (k, i, alpha))
            pprint.pprint(metrics)
            utils.write_results(FLAGS,
                                metrics,
                                i,
                                postfix='alpha_%.12f' % (alpha))
コード例 #4
0
def main(test_mode=False):

    log_fname = "logs/train.log"
    if os.path.isfile(log_fname):
        os.remove(log_fname)
    log_hdl = logging.FileHandler(log_fname)
    log_hdl.setFormatter(logging.Formatter('%(message)s'))
    LOGGER.addHandler(log_hdl)

    data = utils.load_data_2d(test_mode=test_mode,
                              valid_pct=0.1,
                              cropping=False)

    # way to map between string labels and int labels
    y_map = utils.get_y_map(data)
    data['y']['train'] = utils.convert_y(data['y']['train'], y_map)
    data['y']['valid'] = utils.convert_y(data['y']['valid'], y_map)

    # run experiments
    y_test, model, performance, optimizer = exp.resnet(data)
    y_test = utils.convert_y(y_test, y_map)
    utils.write_results('results/resnet.csv', y_test)

    import IPython
    IPython.embed()
コード例 #5
0
def main(args):
    expectations, input_files, columns = pre_process_args(args)
    successful_dest_folder = args.successful_dest_folder
    failed_dest_folder = args.failed_dest_folder

    files_handler = FilesHandler(input_files=input_files)
    files_path = files_handler.get_files_path()

    validator = FileValidator(columns=columns, expectations=expectations)

    logging.info("Starting the validation files process.. ")
    p = Pool(processes=cpu_count())
    r = list(
        tqdm(p.imap(validator.validate, files_path), total=len(files_path)))
    p.close()
    p.join()

    sucessful_expectations, failed_expectations = parse_results(r)
    write_results(sucessful_expectations, success=True)
    write_results(failed_expectations, success=False)

    if (successful_dest_folder != None):
        for expectation in sucessful_expectations:
            file_name = expectation["file_path"].split("/")[-1]
            files_handler.move_file(file_name, successful_dest_folder)

    if (failed_dest_folder != None):
        for expectation in failed_expectations:
            file_name = expectation["file_path"].split("/")[-1]
            files_handler.move_file(file_name, failed_dest_folder)

    if (files_handler.is_from_storage):
        files_handler.clean_temp_folder()
コード例 #6
0
ファイル: modify_results.py プロジェクト: jvrplmlmn/eyegrade
def modify_results(results_filename, exam_config_filename,
                   output_filename, invalidate, set_correct):
    config = utils.read_config()
    results = utils.read_results(results_filename)
    exam_data = utils.ExamConfig(exam_config_filename)
    for result in results:
        modify(result, exam_data, invalidate, set_correct)
    utils.write_results(results, output_filename, config['csv-dialect'])
コード例 #7
0
ファイル: explore.py プロジェクト: w-bonelli/vessel-detector
def detect_contours(binary: np.ndarray, color: np.ndarray,
                    options: VesselDetectorOptions, stem: str,
                    ext: str) -> np.ndarray:
    print(f"Finding contours in {stem}")
    contours, results = find_contours(binary.copy(), color.copy(), options)
    cv2.imwrite(f"{stem}.contours.{ext}", contours)
    write_results(results, options, f"{stem}.contours")
    return contours
コード例 #8
0
def train_test(target_shape, dataset):
    model = DFGAN(batch_size=64, target_shape=target_shape, tag=tag, n_param=1.)
    trainer = SRGANTrainer(model=model, dataset=dataset,
                           num_train_steps=100000, lr=0.0001)
    trainer.train_model(None)
    model.batch_size = 100
    fid, i_s = trainer.test_gan_all()
    write_results(fid, i_s, model, dataset, tag)
コード例 #9
0
def run_experiments_with_cross_validation(
        data,
        dataset_name,
        experiments_configs: List,
        n_sample: int,
        n_iter: int = 20,
        kf_splits: int = 5,
        initialization_method: Callable = random_sample_init,

):
    kf = KFold(n_splits=kf_splits, shuffle=True)

    n_iter = (((len(data) // kf_splits) * (kf_splits - 1)) // n_sample) - 1

    results = []
    samples_data = []
    random_samples_dic = dict()

    for (k, (train_index, test_index)), config in product(enumerate(kf.split(data)), experiments_configs):
        representations = np.array([np.array(sent.tolist()) for sent in data[config['representation']].tolist()])
        labels = np.array([np.array([label]) for label in data.Label.values])
        sentences = np.array([sent for sent in data.sentence.tolist()])

        if config['model_type'] in ORIGINAL_REPRESENTATION_MODELS:
            sentences = representations

        train_representations, test_representations = representations[train_index], representations[test_index]
        train_labels, test_labels = labels[train_index], labels[test_index]
        train_sentences, test_sentences = sentences[train_index], sentences[test_index]

        random_samples_dic[k] = random_samples_dic.get(k, np.random.randint(len(train_index),
                                                                            size=n_sample))  # generate n_sample random indexes from train_index.
        random_init_sample = random_samples_dic.get(k)
        learner = ActiveLearner(
            initialization_method=initialization_method,
            n_samples=n_sample
        )

        res, chosen_samples = run_experiment(deepcopy(learner),
                             config['model_type'],
                             np.copy(train_representations),
                             np.copy(train_sentences),
                             np.copy(test_sentences),
                             np.copy(train_labels),
                             np.copy(test_labels),
                             n_iter,
                             config['sample_method'],
                             random_init_sample,
                             dataset_name)

        res['k_fold'] = [k] * n_iter
        chosen_samples['sample_method'] = res['sample_method'] = [config['sample_method'].__name__] * n_iter
        chosen_samples['representation'] = res['representation'] = [config['representation']] * n_iter
        chosen_samples['model_type'] = res['model_type'] = [config['model_type']] * n_iter
        results.append(res)
        samples_data.append(chosen_samples)

    write_results(results, dataset_name, samples_data)
コード例 #10
0
async def main(endpoints):
    results = {}
    coroutines = [
        asyncio.create_task(check_open_port(endpoint))
        for endpoint in endpoints
    ]
    completed, pending = await asyncio.wait(coroutines)
    for c in completed:
        results[c.result()[0]] = c.result()[1]
    write_results(results, 'asyncio')
コード例 #11
0
ファイル: train_test_C3D.py プロジェクト: sjenni/temporal-ssl
def main(_):
    transforms = FLAGS.transforms.split(',')

    method_name = 'UCF_C3D_{}'.format(''.join(transforms))
    tag = '{}_{}'.format(method_name, FLAGS.tag)
    net_scope = 'features'
    preprocessor_ssl = PreprocessorTransform(seq_length=16, n_speeds=FLAGS.n_speed, crop_size=(112, 112),
                                             resize_shape=(128, 171), transforms=transforms)
    preprocessor = Preprocessor(seq_length=16, skip=FLAGS.frame_skip,
                                crop_size=(112, 112), resize_shape=(128, 171), num_test_seq=32)

    # Initialize the data generator
    dataset_train = UCF101('train_0')

    # Define the network and training
    model = SLC3D(scope=net_scope, tag=tag, net_args={'version': FLAGS.net_version})
    trainer = VideoSSLTrainer(model=model, data_generator=dataset_train, pre_processor=preprocessor_ssl,
                              num_epochs=FLAGS.n_eps_pre, batch_size=FLAGS.batch_size, tag='pre',
                              init_lr=FLAGS.pre_lr, momentum=FLAGS.momentum, wd=FLAGS.wd, skip_pred=FLAGS.skip_pred,
                              num_gpus=FLAGS.num_gpus, train_scopes=net_scope)
    trainer.train_model()
    ckpt = wait_for_new_checkpoint(trainer.get_save_dir(), last_checkpoint=None)

    for i in range(0, 3):
        # Transfer UCF
        transfer_dataset = UCF101('train_{}'.format(i))
        ftuner = VideoBaseTrainer(model=model, data_generator=transfer_dataset, pre_processor=preprocessor,
                                  num_epochs=FLAGS.n_eps_ftune, batch_size=FLAGS.batch_size_ftune,
                                  init_lr=FLAGS.ftune_lr, momentum=FLAGS.momentum, wd=FLAGS.wd,
                                  num_gpus=FLAGS.num_gpus, train_scopes=net_scope, tag='ftune_split{}'.format(i),
                                  exclude_scopes=['global_step', '{}/fc_3'.format(net_scope)])
        ftuner.train_model(ckpt)

        # Evaluate
        dataset_test = UCF101('test_{}'.format(i))
        tester = VideoBaseTester(model, dataset_test, FLAGS.batch_size, preprocessor)
        results = tester.test_classifier_multi_crop(ftuner.get_save_dir())
        write_results(results[0], '{}_ftune_split{}_{}'.format(tag, i, transfer_dataset.name), FLAGS)

        # Finetuning HMDB
        transfer_dataset = HMDB51('train_{}'.format(i))
        ftuner = VideoBaseTrainer(model=model, data_generator=transfer_dataset, pre_processor=preprocessor,
                                  num_epochs=FLAGS.n_eps_ftune, batch_size=FLAGS.batch_size_ftune,
                                  init_lr=FLAGS.ftune_lr, momentum=FLAGS.momentum, wd=FLAGS.wd,
                                  num_gpus=FLAGS.num_gpus, train_scopes=net_scope, tag='ftune_split{}'.format(i),
                                  exclude_scopes=['global_step', '{}/fc_3'.format(net_scope)])
        ftuner.train_model(ckpt)

        # Evaluate
        dataset_test = HMDB51('test_{}'.format(i))
        tester = VideoBaseTester(model, dataset_test, FLAGS.batch_size, preprocessor)
        results = tester.test_classifier_multi_crop(ftuner.get_save_dir())
        write_results(results[0], '{}_ftune_split{}_{}'.format(tag, i, transfer_dataset.name), FLAGS)
コード例 #12
0
    def predict_fine(self, testing_data, results_file):
        x_test, y_test = testing_data

        p = self.prediction_params

        yh_s = self.full_classifier.predict(x_test, batch_size=p['batch_size'])

        single_classifier_error = utils.get_error(y_test, yh_s)
        logger.info('Single Classifier Error: '+str(single_classifier_error))

        results_dict = {'Single Classifier Error': single_classifier_error}
        utils.write_results(results_file, results_dict=results_dict)

        return yh_s
コード例 #13
0
def execute_bfs(grid: np.ndarray, goal: str, max_l: int, puzzle_number: int,
                heuristic_algorithm: str):
    """
    Wrapper function to run bfs
    :param grid: numpy 2D array representation of the input board.
    :param goal: goal grid string
    :param max_l: maximum search path length
    :param puzzle_number: line number of the puzzle
    :param heuristic_algorithm: Heuristic algorithm to be used for this run
    :return: void
    """
    print(
        "Executing BFS Algorithm with heuristic {} and max search length of {} on the grid\n{}"
        .format(heuristic_algorithm, max_l, grid))
    # Initialize necessary data structures
    """
    float: h(n)
    int: pegs
    Node: board state
    """
    open_list: List[Tuple[float, int, Node]] = []
    open_set = set()  # path needed
    closed_set = set()  # nodes already visited
    search_path: List[str] = []

    # initialize root node information
    s_grid = grid_to_string(grid)
    path = ['{}   {}'.format(0, s_grid)
            ]  # adds the initial board state to the grid
    num_black_tokens = s_grid.count('1')
    hn = get_heuristic(heuristic_algorithm, num_black_tokens, 0, set(), '')
    root_node = Node(grid, s_grid, 1, path, hn, num_black_tokens, set())

    heappush(open_list,
             (root_node.get_hn(), get_white_token_score(s_grid), root_node))
    open_set.add(s_grid)

    start_time = time.time()
    solution_path = bfs(open_list, open_set, closed_set, search_path, goal,
                        max_l, heuristic_algorithm,
                        start_time + TIME_TO_SOLVE_PUZZLE_SECONDS)
    end_time = time.time()
    write_results(puzzle_number, BEST_FIRST_ALGORITHM, heuristic_algorithm,
                  solution_path, search_path)
    gather_performance(puzzle_number, np.size(grid, 0), solution_path,
                       len(search_path), start_time, end_time,
                       BEST_FIRST_ALGORITHM, heuristic_algorithm)
    print('Found no solution' if solution_path == constant.NO_SOLUTION else
          'Found solution in {} moves'.format(len(solution_path) - 1))
コード例 #14
0
def generate_csv(timer, mode, write_hits, write_groups):
    for i in range(write_groups):
        response_list =[]
        hits = 0
        current = datetime.datetime.now()
        while hits <= write_hits:
            client = init_client()            
            in_current = datetime.datetime.now()
            hits += 1
            response_list.append(generate_stats(client, in_current))
            print('Sleeping from ' + get_current_hour_minutes_seconds(in_current) +' for '+ str(timer * 60) + ' seconds')
            if(mode=='dev'):
                print('deving...')
            else:
                time.sleep(timer * 60)
        print('Writing results ' + str(i + 1))
        write_results(PATH, response_list, current)
コード例 #15
0
def write_results(run_data):
    """Write all of the recorded results from the experiments"""
    if run_data["options"].verbosity > 0:
        print()
        print(f"Writing confusion matrices to {run_data['options'].result_file}...")
    utils.write_results(
        run_data["options"].result_file,
        torch.round(
            run_data["sim_confusion_matrices"] / run_data["options"].num_sims
        ).long()
    )
    if (scmb := run_data.get('sim_confusion_matrices_bd')) is not None:
        utils.write_results(
            f"bd_{run_data['options'].result_file}",
            torch.round(
                scmb / run_data["options"].num_sims
            ).long()
        )
コード例 #16
0
    def predict_coarse(self, testing_data, results_file, fine2coarse):
        x_test, y_test = testing_data

        p = self.prediction_params

        yh_s = self.full_classifier.predict(x_test, batch_size=p['batch_size'])

        single_classifier_error = utils.get_error(y_test, yh_s)
        logger.info('Single Classifier Error: ' + str(single_classifier_error))

        yh_c = np.dot(yh_s, fine2coarse)
        y_test_c = np.dot(y_test, fine2coarse)
        coarse_classifier_error = utils.get_error(y_test_c, yh_c)

        logger.info('Single Classifier Error: ' + str(coarse_classifier_error))
        results_dict = {'Single Classifier Error': single_classifier_error,
                        'Coarse Classifier Error': coarse_classifier_error}
        utils.write_results(results_file, results_dict=results_dict)
コード例 #17
0
def main(test_mode=False):

    log_fname = "logs/train13.log"
    if os.path.isfile(log_fname):
        os.remove(log_fname)
    log_hdl = logging.FileHandler(log_fname)
    log_hdl.setFormatter(logging.Formatter('%(message)s'))
    LOGGER.addHandler(log_hdl)

    data = utils.load_data(test_mode=test_mode, cropping=True)

    # way to map between string labels and int labels
    y_map = utils.get_y_map(data)
    data['y']['train'] = utils.convert_y(data['y']['train'], y_map)
    data['y']['valid'] = utils.convert_y(data['y']['valid'], y_map)

    # run experiments
    lr_pred, lr_model = exp.lr_baseline(data)
    lr_y_test = utils.convert_y(lr_pred['test'], y_map)
    utils.write_results('results/lr_baseline.csv', lr_y_test)
コード例 #18
0
ファイル: methods.py プロジェクト: w-bonelli/vessel-detector
def original(options: VesselDetectorOptions):
    output_prefix = join(options.output_directory, f"{options.input_stem}")
    output_ext = 'png'

    # read the image
    if options.input_file.endswith('.czi'):
        image = czifile.imread(options.input_file)
        image.shape = (image.shape[2], image.shape[3], image.shape[4]
                       )  # drop first 2 columns
        output_ext = 'jpg'
    else:
        image = cv2.imread(options.input_file)

    # make backup image
    image_copy = image.copy()
    cv2.imwrite(f"{output_prefix}.original.{output_ext}", image_copy)

    # threshold and clustering
    colorspace = 'lab'
    channels = 'all'
    clusters = 2
    thresh = clustering_grayscale(image_copy, colorspace, channels, clusters) if image.shape[2] == 1 \
        else clustering_color(image_copy, colorspace, channels, clusters)
    cv2.imwrite(f"{output_prefix}.segmented.{output_ext}", thresh)

    # watershed segmentation
    min_distance_value = 5
    labels = apply_watershed(image_copy, thresh, min_distance_value)

    # find vessel contours
    if options.min_radius is not None:
        (avg_curv, label_trait,
         results) = find_vessels(image_copy, labels, image.shape[2] == 1,
                                 options.min_radius)
        write_results(results, options, f"{output_prefix}")
    else:
        (avg_curv, label_trait,
         results) = find_vessels(image_copy, labels, image.shape[2] == 1)
        write_results(results, options, f"{output_prefix}")
    if label_trait is not None:
        cv2.imwrite(f"{output_prefix}.contours.{output_ext}", label_trait)
コード例 #19
0
 def evaluate(tokens, instances, labels, write_result=False):
     '''Evaluate and print results'''
     prediction, target = sess.run(
         [model.pred, model.output_y],
         feed_dict={
             model.input_x: np.asarray(instances),
             model.output_y: np.asarray(labels),
             model.dropout: 1.0
         })
     prec, recall, f1sc = f1score(2, prediction, target)
     if write_result:
         print("Found MAX")
         print("--Tokenwise P:{:.5f}".format(prec),
               "R:{:.5f}".format(recall), "F1:{:.5f}".format(f1sc))
         prec, recall, f1sc = phrasalf1score(args, tokens, prediction,
                                             target)
         print("--Phrasal P:{:.5f}".format(prec),
               "R:{:.5f}".format(recall), "F1:{:.5f}".format(f1sc))
         write_results(tokens, prediction, target,
                       "runs/res_{:.5f}".format(f1sc) + ".txt")
     return f1sc
コード例 #20
0
ファイル: vanilla_cnn.py プロジェクト: rarriaza/ATPRO_HCNN
    def predict(self, testing_data, results_file, fine2coarse):
        x_test, y_test = testing_data

        self.load_best_full_model()

        p = self.prediction_params

        yh_s = self.full_model.predict(x_test, batch_size=p['batch_size'])

        single_classifier_error = utils.get_error(y_test, yh_s)
        logger.info('Single Classifier Error: ' + str(single_classifier_error))

        results_dict = {'Single Classifier Error': single_classifier_error}
        utils.write_results(results_file, results_dict=results_dict)

        np.save(self.model_directory + "/fine_predictions.npy", yh_s)
        # np.save(self.model_directory + "/coarse_predictions.npy", ych_s)
        np.save(self.model_directory + "/fine_labels.npy", y_test)
        # np.save(self.model_directory + "/coarse_labels.npy", yc_test)

        return yh_s
コード例 #21
0
def run(args):

    start = time.time()
    if args.local_file is not None:
        file_path = download_file_from_storage(args.input_file)
    else:
        file_path = args.input_file

    index = args.index_column
    date_column = args.date_column
    y_column = args.y_column

    data = load_parse_file(file_path=file_path)
    dataframes = get_frames_by_id(dataframe=data, index_col=index)

    #save dataframes on google cloud storage
    with open("dataframes.dill", "wb") as dill_file:
        dill.dump(dataframes, dill_file)
    dill_file.close()
    if args.local_file is not None:
        pass
    else:
        save_in_gcs("dataframes.dill", args.output_path)

    prophet_config_file_name = args.prophet_options
    if prophet_config_file_name is not None:
        with open('config_example.json', 'r') as f:
            config = json.load(f)
    else:
        config = {'prophet_obj_kwds': {}, 'predict_kwds': {}, 'fit_kwds': {}}

    p = Pool(cpu_count())
    partial_func = partial(run_prophet,
                           date_column=date_column,
                           y_column=y_column,
                           index_column=index,
                           type_=args.type,
                           start_date=args.start_date,
                           end_date=args.end_date,
                           prophet_obj_kwds=config['prophet_obj_kwds'],
                           predict_kwds=config['predict_kwds'],
                           fit_kwds=config['fit_kwds'])

    predictions = p.map(partial_func, dataframes)
    results_path = write_results(predictions, file_name=args.output_name)
    if args.local_file is not None:
        pass
    else:
        save_in_gcs(results_path, args.output_path)
    print("Done in {0} minutes".format((time.time() - start) / 60))
コード例 #22
0
def main(n=None):
    
    if n:
        n = int(n)
    
    # Load word2vec models.
    model_source = "vectors/source_vectors.bin"
    model_target = "vectors/target_vectors.bin"
    model_source = KeyedVectors.load_word2vec_format(model_source, binary=True)
    model_target = KeyedVectors.load_word2vec_format(model_target, binary=True)
    
    # Load translation matrix from file.
    tm = np.loadtxt("data/transformation_matrix.csv", delimiter=",")
    
    # Pairs: a CSV file containing source and target sentence pairs.
    pairsfile = "data/sample_pairs.csv"
    if n:
        # Shorten data to n pairs to make testing faster.
        pairs = pairs_from_df(pairsfile)
        pairs = shuffle_and_slice(pairs, n)
    else:
        pairs = pairs_from_df(pairsfile)
            
    pair_objects = list()
    for p in pairs:
        # define pair objects
        pair = SentencePair(p[0], p[1])
        pair_objects.append(pair)
    
    pairs = calculate_sentence_vectors(model_source, model_target, tm, pair_objects)
    scores = compare_sentence_vectors(pairs)
    pairs = add_scores_to_pairs(pairs, scores)
    
    average = np.mean(scores)
    print("Scores average: ", average)
    
    write_results(pairs)
コード例 #23
0
"Designing Poly-Time Algorithms" lecture
"""


def better_enum_max_subarray(ls):
	maxSum = newSum = 0
	low = high = 0
	i = 0
	for i in range (len(ls)):
		newSum = 0
		for j in range(i, len(ls)):
			newSum = newSum + ls[j]
			if newSum > maxSum:
				maxSum = newSum
				low = i
				high = j
	return low, high, maxSum


if __name__ == '__main__':
	problems = load_problems()
    	for problem in problems
		results = better_enum_max_subarray(ls = problem)
        	write_results(
            		filename='MSS_Results.txt',
            		original_array=problem,
           	  	max_subarray=problem[results[0]:results[1] + 1],
            		max_sum=results1[2]

        )
コード例 #24
0
        left_low, left_high, left_sum = divide_and_conquer_find_max_subarray(array, low, mid)

        right_low, right_high, right_sum = divide_and_conquer_find_max_subarray(array, mid + 1, high)

        # crossing sub-problem
        cross_low, cross_high, cross_sum = find_max_crossing_subarray(array, low, mid, high)

        # case 1: max sub-array is in left array
        if left_sum >= right_sum and left_sum >= cross_sum:
            return left_low, left_high, left_sum

        # case 2: max sub-array is in right array
        elif right_sum >= left_sum and right_sum >= cross_sum:
            return right_low, right_high, right_sum

        # case 3: max sub-array is in array crossing midpoint
        else:
            return cross_low, cross_high, cross_sum


if __name__ == "__main__":
    problems = load_problems()
    for problem in problems:
        results = divide_and_conquer_find_max_subarray(array=problem, low=0, high=len(problem) - 1)
        write_results(
            filename="MSS_Results.txt",
            original_array=problem,
            max_subarray=problem[results[0] : results[1] + 1],
            max_sum=results[2],
        )
コード例 #25
0
        y0, y1 = get_y0_y1(sess, y_post, f0, f1, shape=yalltr.shape, L=100, verbose=False, task=task)
        if task == 'ihdp':
            y0, y1 = y0 * ys + ym, y1 * ys + ym
        score = evaluator_train.calc_stats(y1, y0)
        scores[i, :] = score
        # out-test score
        y0t, y1t = get_y0_y1(sess, y_post, f0t, f1t, shape=yte.shape, L=100, verbose=False, task=task)
        if task == 'ihdp':
            y0t, y1t = y0t * ys + ym, y1t * ys + ym
        score_test = evaluator_test.calc_stats(y1t[test_filter], y0t[test_filter])
        scores_test[i, :] = score_test
        print('Replication: {}/{}, tr_ite: {:0.3f}, tr_ate: {:0.3f}, tr_score: {:0.3f}' \
              ', te_ite: {:0.3f}, te_ate: {:0.3f}, te_score: {:0.3f}'.format(i + 1, args.reps,
                                                                            score[0], score[1], score[2],
                                                                            score_test[0], score_test[1], score_test[2]))
        sess.close()

print('CEVAE model total scores ' + str(arg_info))
train_means, train_stds = np.mean(scores, axis=0), sem(scores, axis=0)
print('train ITE: {:.3f}+-{:.3f}, train ATE: {:.3f}+-{:.3f}, train SCORE: {:.3f}+-{:.3f}' \
      ''.format(train_means[0], train_stds[0], train_means[1], train_stds[1], train_means[2], train_stds[2]))
test_means, test_stds = np.mean(scores_test, axis=0), sem(scores_test, axis=0)
print('test ITE: {:.3f}+-{:.3f}, test ATE: {:.3f}+-{:.3f}, test SCORE: {:.3f}+-{:.3f}' \
      ''.format(test_means[0], test_stds[0], test_means[1], test_stds[1], test_means[2], test_stds[2]))
# save scores to csv file
results = (train_means[2], train_stds[2], test_means[2], test_stds[2])
try:
    write_results(args, results)
except:
    print(results)
コード例 #26
0

# 11.9472119646
def k(tweets):
    num_words = float(tweets.map(lambda t: len(t[COLUMNS.index('tweet_text')].split())).sum())
    avg_length = num_words / tweets.count()
    return "Tweet avg num words\t" + str(avg_length)


if __name__ == "__main__":
    task = '1'
    tweets = get_tweets(task, False)

    result_file = open(result_file(task), "w")

    a = a(tweets)
    b = b(tweets)
    c = c(tweets)
    d = d(tweets)
    e = e(tweets)
    f = f(tweets)
    g = g(tweets)
    h = h(tweets)
    i = i(tweets)
    j = j(tweets)
    k = k(tweets)

    results = [a, b, c, d, e, f, g, h, i, j, k]

    write_results(result_file, results)
コード例 #27
0
from utils import get_tweets, result_file, write_results
from constants import *


def tweets_place(tweets):
    tuples = \
        tweets.filter(lambda t: t[COLUMNS.index('country_code')] == 'US' and t[COLUMNS.index('place_type')] == 'city')\
        .map(lambda t: (t[COLUMNS.index('place_name')], 1))\
        .aggregateByKey(0, (lambda x, y: x + y), (lambda rdd1, rdd2: rdd1 + rdd2))\
        .sortByKey()\
        .sortBy(lambda t: t[1], False)\
        .map(lambda t: '%s\t%s' % (t[0], t[1]))\
        .collect()

    return tuples


if __name__ == "__main__":
    task = '5'
    tweets = get_tweets(task, False)

    result_file = open(result_file(task), "w")

    results = tweets_place(tweets)

    write_results(result_file, results, cols=['place_name', 'num_tweets'])
コード例 #28
0
ファイル: svae_eval.py プロジェクト: nyu-mll/SentEval
def main(arguments):
    parser = argparse.ArgumentParser(description=__doc__,
                    formatter_class=argparse.RawDescriptionHelpFormatter)

    # Logistics
    parser.add_argument("--cuda", help="CUDA id to use", type=int, default=0)
    parser.add_argument("--seed", help="Random seed", type=int, default=19)
    parser.add_argument("--use_pytorch", help="1 to use PyTorch", type=int, default=1)
    parser.add_argument("--out_dir", help="Dir to write preds to", type=str, default='')
    parser.add_argument("--log_file", help="File to log to", type=str)
    parser.add_argument("--load_data", help="0 to read data from scratch", type=int, default=1)

    # Task options
    parser.add_argument("--tasks", help="Tasks to evaluate on, as a comma separated list", type=str)
    parser.add_argument("--max_seq_len", help="Max sequence length", type=int, default=40)

    # Model options
    parser.add_argument("--ckpt_path", help="Path to ckpt to load", type=str,
                        default=PATH_PREFIX + 'ckpts/svae/glue_svae/best.mdl')
    parser.add_argument("--vocab_path", help="Path to vocab to use", type=str,
                        default=PATH_PREFIX + 'processed_data/svae/glue_v2/vocab.json')
    parser.add_argument("--model", help="Word emb dim", type=str, default='vae')
    parser.add_argument("--embedding_size", help="Word emb dim", type=int, default=300)
    parser.add_argument("--word_dropout", help="Word emb dim", type=float, default=0.5)
    parser.add_argument("--hidden_size", help="RNN size", type=int, default=512)
    parser.add_argument("--latent_size", help="Latent vector dim", type=int, default=16)
    parser.add_argument("--num_layers", help="Number of encoder layers", type=int, default=1)
    parser.add_argument("--bidirectional", help="1 for bidirectional", type=bool, default=False)
    parser.add_argument("--rnn_type", help="Type of rnn", type=str, choices=['rnn', 'gru'],
                        default='gru')
    parser.add_argument("--batch_size", help="Batch size to use", type=int, default=64)

    # Classifier options
    parser.add_argument("--cls_batch_size", help="Batch size to use", type=int, default=64)

    args = parser.parse_args(arguments)
    logging.basicConfig(format='%(asctime)s : %(message)s', level=logging.DEBUG)
    if args.log_file:
        fileHandler = logging.FileHandler(args.log_file)
        logging.getLogger().addHandler(fileHandler)
    logging.info(args)

    # define senteval params
    params_senteval = {'task_path': PATH_TO_DATA, 'usepytorch': args.use_pytorch, 'kfold': 10,
            'max_seq_len': args.max_seq_len, 'batch_size': args.batch_size, 'load_data': args.load_data,
            'seed': args.seed}
    params_senteval['classifier'] = {'nhid': 0, 'optim': 'adam', 'batch_size': args.cls_batch_size,
            'tenacity': 5, 'epoch_size': 4, 'cudaEfficient': True}

    # Load InferSent model
    vocab = json.load(open(args.vocab_path, 'r'))
    args.denoise = False
    args.prob_swap, args.prob_drop = 0.0, 0.0
    if args.model == 'vae':
        model = SentenceVAE(args, vocab['w2i'],
                            #sos_idx=w2i['<sos>'], eos_idx=w2i['<eos>'], pad_idx=w2i['<pad>'],
                            #max_sequence_length=args.max_seq_len,
                            embedding_size=args.embedding_size,
                            rnn_type=args.rnn_type, hidden_size=args.hidden_size,
                            word_dropout=args.word_dropout, latent_size=args.latent_size,
                            num_layers=args.num_layers, bidirectional=args.bidirectional)
    elif args.model == 'ae':
        model = SentenceAE(args, vocab['w2i'],
                           embedding_size=args.embedding_size,
                           rnn_type=args.rnn_type, hidden_size=args.hidden_size,
                           word_dropout=args.word_dropout, latent_size=args.latent_size,
                           num_layers=args.num_layers, bidirectional=args.bidirectional)

    model.load_state_dict(torch.load(args.ckpt_path))
    model = model.cuda()
    model.eval()
    params_senteval['model'] = model

    # Do SentEval stuff
    se = senteval.engine.SE(params_senteval, batcher, prepare)
    tasks = get_tasks(args.tasks)
    results = se.eval(tasks)
    if args.out_dir:
        write_results(results, args.out_dir)
    if not args.log_file:
        print(results)
    else:
        logging.info(results)
コード例 #29
0
ファイル: run.py プロジェクト: zycdev/L2R2
def train(args, model, tokenizer):
    args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu)
    exp_dir = 'H%d_L%d_E%d_B%d_LR%s_WD%s_%s' % (args.max_hyp_num, args.max_seq_len, args.num_train_epochs,
                                                args.train_batch_size, args.learning_rate, args.weight_decay,
                                                datetime.now().strftime('%m%d%H%M'))
    args.output_dir = os.path.join(args.output_dir, exp_dir)
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    with open(os.path.join(args.output_dir, 'args.json'), 'w') as f:
        arg_dict = copy.deepcopy(args.__dict__)
        arg_dict['device'] = str(args.device)
        json.dump(arg_dict, f, indent=2)

    os.mkdir(os.path.join(args.output_dir, 'src'))
    for src_file in ['model.py', 'losses.py', 'run.py']:
        dst_file = os.path.join(args.output_dir, 'src', os.path.basename(src_file))
        shutil.copyfile(src_file, dst_file)

    file_handler = logging.FileHandler(os.path.join(args.output_dir, 'log.txt'))
    file_handler.setFormatter(formatter)
    file_handler.setLevel(logging.DEBUG)
    logger.addHandler(file_handler)

    train_dataset = load_dataset(args, tokenizer, mode='train')
    train_sampler = RandomSampler(train_dataset)
    data_loader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.train_batch_size, num_workers=16)

    if args.max_steps > 0:
        t_total = args.max_steps
        args.num_train_epochs = args.max_steps // (len(data_loader) // args.gradient_accumulation_steps) + 1
    else:
        t_total = len(data_loader) // args.gradient_accumulation_steps * args.num_train_epochs

    # Prepare optimizer and schedule (linear warmup and decay)
    no_decay = ['bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
        {
            'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
            'weight_decay': args.weight_decay
        },
        {
            'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)],
            'weight_decay': 0.0
        }
    ]
    optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)
    # optimizer = optim.SGD(optimizer_grouped_parameters, lr=args.learning_rate)
    # scheduler = WarmupLinearSchedule(optimizer, warmup_steps=args.warmup_steps, t_total=t_total)
    if args.fp16:
        try:
            from apex import amp
        except ImportError:
            raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.")
        model, optimizer = amp.initialize(model, optimizer, opt_level=args.fp16_opt_level)

    # multi-gpu training (should be after apex fp16 initialization)
    if not args.no_cuda and args.n_gpu > 1:
        model = torch.nn.DataParallel(model)

    logger.info("***** Running training *****")
    logger.info("  Num stories = %d", len(train_dataset))
    logger.info("  Num epochs = %d", args.num_train_epochs)
    logger.info("  Instantaneous batch size per GPU = %d", args.per_gpu_train_batch_size)
    logger.info("  Total batch size = %d", args.train_batch_size * args.gradient_accumulation_steps)
    logger.info("  Gradient accumulation steps = %d", args.gradient_accumulation_steps)
    logger.info("  Total optimization steps = %d", t_total)
    logger.info("  Criterion = %s", args.criterion)
    logger.info("  Learning rate = %s", args.learning_rate)

    tb_writer = SummaryWriter(os.path.join('runs/', exp_dir))

    global_step = 0
    best_acc, best_step = 0, 0
    keys = ['list_mle', 'list_net', 'approx_ndcg', 'rank_net', 'hinge', 'lambda']
    losses = dict.fromkeys(keys, 0.0)
    last_losses = losses.copy()
    model.zero_grad()
    epoch_iterator = trange(int(args.num_train_epochs), desc="Epoch")
    set_seed(args)  # Added here for reproducibility (even between python 2 and 3)
    for epoch in epoch_iterator:
        batch_iterator = tqdm(data_loader, desc="Iteration")
        for step, batch in enumerate(batch_iterator):
            model.train()
            batch = tuple(t.to(args.device) if torch.is_tensor(t) else t for t in batch)
            x = {'input_ids': batch[0], 'token_type_ids': batch[1], 'attention_mask': batch[2]}
            # (batch_size, list_len)
            logits = model(**x)
            labels = batch[3]

            _losses = dict()
            _losses['list_mle'] = list_mle(logits, labels)
            _losses['list_net'] = list_net(logits, labels)
            _losses['approx_ndcg'] = approx_ndcg_loss(logits, labels)
            _losses['rank_net'] = rank_net(logits, labels)
            _losses['hinge'] = pairwise_hinge(logits, labels)
            _losses['lambda'] = lambda_loss(logits, labels)

            if args.n_gpu > 1:
                # mean() to average on multi-gpu parallel (not distributed) training
                for k, v in _losses.items():
                    _losses[k] = v.mean()
            if args.gradient_accumulation_steps > 1:
                for k in _losses.keys():
                    _losses[k] /= args.gradient_accumulation_steps

            if args.fp16:
                with amp.scale_loss(_losses[args.criterion], optimizer) as scaled_loss:
                    scaled_loss.backward()
                torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), args.max_grad_norm)
            else:
                _losses[args.criterion].backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)

            for k in losses.keys():
                losses[k] += _losses[k].item()

            if (step + 1) % args.gradient_accumulation_steps == 0:
                optimizer.step()
                # scheduler.step()  # Update learning rate schedule
                model.zero_grad()
                global_step += 1

                # Log losses
                if args.log_period > 0 and global_step % args.log_period == 0:
                    # tb_writer.add_scalar('lr', scheduler.get_lr()[0], global_step)
                    for k in losses:
                        tb_writer.add_scalar('loss/' + k, (losses[k] - last_losses[k]) / args.log_period, global_step)
                    last_losses = losses.copy()

                # Log metrics
                if args.eval_period > 0 and global_step % args.eval_period == 0:
                    metrics, dev_losses = evaluate(args, model, tokenizer,
                                                   prefix='%d-%d' % (epoch, global_step), partition=1)
                    for k, v in metrics.items():
                        tb_writer.add_scalar('metrics_dev/' + k, v, global_step)
                    for k, v in dev_losses.items():
                        tb_writer.add_scalar('loss_dev/' + k, v, global_step)
                    if metrics['accuracy'] > best_acc:
                        best_acc = metrics['accuracy']
                        best_step = global_step
                        logger.info("  Achieve best accuracy: %.2f", best_acc * 100)
                        output_dir = os.path.join(args.output_dir, 'checkpoint-best_acc')
                        if not os.path.exists(output_dir):
                            os.makedirs(output_dir)
                        model_to_save = model.module if hasattr(model, 'module') else model
                        model_to_save.save_pretrained(output_dir)
                        tokenizer.save_pretrained(output_dir)
                        write_results('step: %d' % best_step,
                                      metrics, dev_losses, os.path.join(output_dir, "dev-eval.txt"))
                        shutil.copyfile(os.path.join(args.output_dir, 'raw_dev.pkl'),
                                        os.path.join(output_dir, 'raw_dev.pkl'))
                        shutil.copyfile(os.path.join(args.output_dir, 'dev-pred.lst'),
                                        os.path.join(output_dir, 'dev-pred.lst'))

                # Save model checkpoint
                if args.save_period > 0 and global_step % args.save_period == 0:
                    output_dir = os.path.join(args.output_dir, 'checkpoint-{}'.format(global_step))
                    if not os.path.exists(output_dir):
                        os.makedirs(output_dir)
                    model_to_save = model.module if hasattr(model, 'module') else model
                    model_to_save.save_pretrained(output_dir)
                    tokenizer.save_pretrained(output_dir)
                    if global_step % args.eval_period == 0:
                        write_results('step: %d' % global_step,
                                      metrics, dev_losses, os.path.join(output_dir, "dev-eval.txt"))

            batch_iterator.set_description('Iteration(loss=%.4f)' % _losses[args.criterion].item())
            if 0 < args.max_steps < global_step:  # stop_train or
                batch_iterator.close()
                break
        if 0 < args.max_steps < global_step:  # stop_train or
            epoch_iterator.close()
            break

    tb_writer.close()

    logger.info(" global_step = %s, average loss = %s", global_step, losses[args.criterion] / global_step)
    logger.info("achieve best accuracy: %.2f at step %s", best_acc * 100, best_step)

    if args.save_period > 0:
        model_to_save = model.module if hasattr(model, 'module') else model  # Take care of parallel training
        model_to_save.save_pretrained(os.path.join(args.output_dir, 'checkpoint-final'))
        tokenizer.save_pretrained(os.path.join(args.output_dir, 'checkpoint-final'))

    # logger.removeHandler(file_handler)

    return global_step, losses[args.criterion] / global_step
コード例 #30
0
ファイル: run.py プロジェクト: zycdev/L2R2
def main():
    parser = argparse.ArgumentParser()

    # Required parameters
    parser.add_argument("--data_dir", default='dataset/alphanli/', type=str, required=True,
                        help="The input data dir.")
    parser.add_argument("--output_dir", default=None, type=str, required=True,
                        help="The output directory where the model checkpoints and predictions will be written.")
    parser.add_argument("--model_type", default=None, type=str, required=True,
                        help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys()))
    parser.add_argument("--model_name_or_path", default=None, type=str, required=True,
                        help="Path to pretrained model or shortcut name selected in the list: " + ", ".join(ALL_MODELS))

    # Other parameters
    parser.add_argument("--config_name", default="", type=str,
                        help="Pretrained config name or path if not the same as model_name")
    parser.add_argument("--tokenizer_name", default="", type=str,
                        help="Pretrained tokenizer name or path if not the same as model_name")
    parser.add_argument("--do_lower_case", action='store_true',
                        help="Set this flag if you are using an uncased model.")
    parser.add_argument("--cache_dir", default="", type=str,
                        help="Where do you want to store the pre-trained models downloaded from s3")

    parser.add_argument('--linear_dropout_prob', type=float, default=0.6)

    parser.add_argument("--max_hyp_num", default=22, type=int,
                        help="The maximum number of hypotheses for a story.")
    parser.add_argument("--tt_max_hyp_num", default=22, type=int,
                        help="The maximum number of hypotheses for a story at training time.")
    parser.add_argument("--max_seq_len", default=128, type=int,
                        help="The maximum total input sequence length after tokenization. Sequences longer "
                             "than this will be truncated, sequences shorter will be padded.")

    parser.add_argument("--do_train", action='store_true',
                        help="Whether to run training.")
    parser.add_argument("--do_eval", action='store_true',
                        help="Whether to run eval on the dev set.")

    parser.add_argument("--criterion", default="list_mle", type=str,
                        help="Criterion for optimization selected in "
                             "[list_mle, list_net, approx_ndcg, rank_net, hinge, lambda]")
    parser.add_argument("--per_gpu_train_batch_size", default=8, type=int,
                        help="Batch size per GPU/CPU for training.")
    parser.add_argument("--per_gpu_eval_batch_size", default=8, type=int,
                        help="Batch size per GPU/CPU for evaluation.")
    parser.add_argument("--learning_rate", default=5e-5, type=float,
                        help="The initial learning rate for Adam.")
    parser.add_argument("--weight_decay", default=0.0, type=float,  # 0.01
                        help="Weight decay if we apply some.")
    parser.add_argument("--adam_epsilon", default=1e-8, type=float,
                        help="Epsilon for Adam optimizer.")
    parser.add_argument("--max_grad_norm", default=1.0, type=float,
                        help="Max gradient norm.")
    parser.add_argument("--num_train_epochs", default=3, type=int,
                        help="Total number of training epochs to perform.")
    parser.add_argument("--max_steps", default=-1, type=int,
                        help="If > 0: set total number of training steps to perform. Override num_train_epochs.")
    parser.add_argument("--warmup_steps", default=0, type=int,
                        help="Linear warmup over warmup_steps.")
    parser.add_argument('--gradient_accumulation_steps', type=int, default=1,
                        help="Number of updates steps to accumulate before performing a backward/update pass.")
    parser.add_argument('--seed', type=int, default=42,
                        help="random seed for initialization")

    parser.add_argument('--log_period', type=int, default=50,
                        help="Log every X updates steps.")
    parser.add_argument('--eval_period', type=int, default=1000,
                        help="Evaluate every X updates steps.")
    parser.add_argument('--save_period', type=int, default=-1,
                        help="Save checkpoint every X updates steps.")
    parser.add_argument("--eval_all_checkpoints", action='store_true',
                        help="Evaluate all checkpoints starting with the same prefix as model_name "
                             "and ending with step number")
    parser.add_argument("--no_cuda", action='store_true',
                        help="Whether not to use CUDA when available")
    parser.add_argument('--overwrite_output_dir', action='store_true',
                        help="Overwrite the content of the output directory")
    parser.add_argument('--overwrite_cache', action='store_true',
                        help="Overwrite the cached training and evaluation sets")

    parser.add_argument('--fp16', action='store_true',
                        help="Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit")
    parser.add_argument('--fp16_opt_level', type=str, default='O1',
                        help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
                             "See details at https://nvidia.github.io/apex/amp.html")

    parser.add_argument('--comment', default=None, type=str, help='The comment to the experiment')
    args = parser.parse_args()

    if (os.path.exists(args.output_dir) and os.listdir(args.output_dir) and
            args.do_train and not args.overwrite_output_dir):
        raise ValueError("Output directory ({}) already exists and is not empty. "
                         "Use --overwrite_output_dir to overcome.".format(args.output_dir))

    # Setup CUDA, GPU
    device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
    args.n_gpu = torch.cuda.device_count() if not args.no_cuda else 0
    args.device = device

    logger.info("Device: %s, n_gpu: %s, 16-bits training: %s", device, args.n_gpu, args.fp16)

    # Set seed
    set_seed(args)

    args.model_type = args.model_type.lower()
    config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type]

    logger.info("Training/evaluation parameters: %s", args)

    # Before do anything with models, we want to ensure that we get fp16 execution of torch.einsum if args.fp16 is set.
    # Otherwise it'll default to "promote" mode, and we'll get fp32 operations.
    # Note that running `--fp16_opt_level="O2"` will remove the need for this code, but it is still valid.
    if args.fp16:
        try:
            import apex
            apex.amp.register_half_function(torch, 'einsum')
        except ImportError:
            raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.")

    # Training
    if args.do_train:
        config = config_class.from_pretrained(
            args.config_name if args.config_name else args.model_name_or_path,
            cache_dir=args.cache_dir if args.cache_dir else None,
        )
        if not hasattr(config, 'linear_dropout_prob'):
            config.linear_dropout_prob = args.linear_dropout_prob
        tokenizer = tokenizer_class.from_pretrained(
            args.tokenizer_name if args.tokenizer_name else args.model_name_or_path,
            do_lower_case=args.do_lower_case,
            cache_dir=args.cache_dir if args.cache_dir else None,
        )
        model = model_class.from_pretrained(
            args.model_name_or_path,
            config=config,
            cache_dir=args.cache_dir if args.cache_dir else None,
        )
        logger.info(str(model))

        model.to(args.device)

        train(args, model, tokenizer)

        # Load a trained model and vocabulary that you have fine-tuned
        model = model_class.from_pretrained(os.path.join(args.output_dir, 'checkpoint-best_acc'))
        model.to(args.device)

    # Evaluation - we can ask to evaluate all the checkpoints (sub-directories) in a directory
    if args.do_eval:
        results = {}
        if args.eval_all_checkpoints:
            checkpoints = list(os.path.dirname(c)
                               for c in sorted(glob.glob(args.output_dir + '/**/' + WEIGHTS_NAME, recursive=True)))
        else:
            checkpoints = [os.path.join(args.output_dir, 'checkpoint-best_acc')]

        logging.getLogger("utils").setLevel(logging.INFO)
        logger.info("Evaluate the following checkpoints: %s", checkpoints)
        for checkpoint in checkpoints:
            # Reload the model
            global_step = checkpoint.split('-')[-1] if 'checkpoint' in checkpoint else ""
            tokenizer = tokenizer_class.from_pretrained(checkpoint, do_lower_case=args.do_lower_case,
                                                        cache_dir=args.cache_dir if args.cache_dir else None)
            model = model_class.from_pretrained(checkpoint, cache_dir=args.cache_dir if args.cache_dir else None)
            model.to(args.device)
            if not args.no_cuda and args.n_gpu > 1:
                model = torch.nn.DataParallel(model)

            # Evaluate
            args.output_dir = checkpoint
            metrics, losses = evaluate(args, model, tokenizer, prefix=global_step, partition=None)
            write_results(args.comment, metrics, losses, os.path.join(args.output_dir, "dev-eval.txt"))
            metrics = dict((k + ('_{}'.format(global_step) if global_step else ''), v) for k, v in metrics.items())
            results.update(metrics)
        logger.info("Results: {}".format(results))
コード例 #31
0
def main(arguments):
    parser = argparse.ArgumentParser(description=__doc__,
                                     formatter_class=argparse.RawDescriptionHelpFormatter)

    # Logistics
    parser.add_argument("--gpu_id", help="gpu id to use", type=int, default=0)
    parser.add_argument("--seed", help="Random seed", type=int, default=19)
    parser.add_argument("--use_pytorch", help="1 to use PyTorch", type=int, default=0)
    parser.add_argument("--out_dir", help="Dir to write preds to", type=str, default='')
    parser.add_argument("--log_file", help="File to log to", type=str, default='')
    parser.add_argument("--load_data", help="0 to read data from scratch", type=int, default=1)

    # Model options
    parser.add_argument("--batch_size", help="Batch size to use", type=int, default=16)
    parser.add_argument("--model_dir", help="path to model folder")
    parser.add_argument("--prefix1", help="prefix to model 1", default='nli_large_bothskip_parse')
    parser.add_argument("--prefix2", help="prefix to model 2", default='nli_large_bothskip')
    parser.add_argument("--word_vec_file", help="path to pretrained vectors")
    parser.add_argument("--strategy", help="Approach to create sentence embedding last/max/best",
                        choices=["best", "max", "last"], default="best")

    # Task options
    parser.add_argument("--tasks", help="Tasks to evaluate on, as a comma separated list", type=str)
    parser.add_argument("--max_seq_len", help="Max sequence length", type=int, default=40)


    # Classifier options
    parser.add_argument("--cls_batch_size", help="Batch size to use for the classifier", type=int,
                        default=16)

    args = parser.parse_args(arguments)
    logging.basicConfig(format='%(asctime)s : %(message)s', level=logging.DEBUG)
    if not os.path.exists(args.out_dir):
        os.makedirs(args.out_dir)
    log_file = os.path.join(args.out_dir, "results.log")
    fileHandler = logging.FileHandler(log_file)
    logging.getLogger().addHandler(fileHandler)
    logging.info(args)
    torch.cuda.set_device(args.gpu_id)

    # Set up SentEval
    params_senteval = {'task_path': PATH_TO_DATA, 'usepytorch': args.use_pytorch, 'kfold': 10,
            'max_seq_len': args.max_seq_len, 'batch_size': args.batch_size, 'load_data': args.load_data,
            'seed': args.seed}
    params_senteval['classifier'] = {'nhid': 0, 'optim': 'adam', 'batch_size': args.cls_batch_size,
            'tenacity': 5, 'epoch_size': 4, 'cudaEfficient': True}

    # Load model
    # import GenSen package
    sys.path.insert(0, args.model_dir)
    from gensen import GenSen, GenSenSingle

    ckpt_dir = os.path.join(args.model_dir, "data", "models")
    gensen_1 = GenSenSingle(model_folder=ckpt_dir, filename_prefix=args.prefix1,
                            pretrained_emb=args.word_vec_file, cuda=bool(args.gpu_id >= 0))
    gensen_2 = GenSenSingle(model_folder=ckpt_dir, filename_prefix=args.prefix2,
                            pretrained_emb=args.word_vec_file, cuda=bool(args.gpu_id >= 0))
    gensen = GenSen(gensen_1, gensen_2)
    global STRATEGY
    STRATEGY = args.strategy
    params_senteval['gensen'] = gensen

    # Do SentEval stuff
    se = senteval.engine.SE(params_senteval, batcher, prepare)
    tasks = get_tasks(args.tasks)
    results = se.eval(tasks)
    write_results(results, args.out_dir)
    logging.info(results)
コード例 #32
0
ファイル: darknet.py プロジェクト: g0josh/yolov3
                print ("unknown blocks - {}".format(block['type']))

            features_per_layer.append(x)

        return detections

"""
Testing with a sample image
"""
if __name__ == '__main__':
    inp = prep_image('images/dog.jpg', 416)
    dnn = Darknet('cfg/yolov3.cfg')
    # print ("Module list = {}".format(dnn.module_list))
    dnn.load_weights('yolov3.weights')
    CUDA = torch.cuda.is_available()
    if CUDA:
        dnn.cuda()
        inp = inp[0].cuda()
    dnn.eval()
    print ("inp = {}\nshape = {}".format(inp, inp.shape))
    with torch.no_grad():
        pred = dnn(inp, CUDA)
    print ("prediction = {}\nshape = {}".format(pred, pred.shape))
    with open("/home/cbarobotics/dev/pred.t", 'wb') as f:
        torch.save(pred, f)
    res = write_results(pred, 0.5, 80)
    print ("res = {}\nshape = {}".format(res, res.shape))

                

コード例 #33
0
ファイル: depict_dist_4.py プロジェクト: vonalan/DC
def main():
    tf.logging.set_verbosity(tf.logging.INFO)
    prepare_file_system()

    # FLAGS.eval_step_interval = 1
    # FLAGS.infer_step_interal = 10

    # TODO: OOP
    train_graph = tf.Graph()
    with train_graph.as_default():
        train_filenames, train_iterator, train_elements = \
            build_text_line_reader(shuffle=True, batch_size=FLAGS.train_batch_size)
        train_inputs, train_cost, optimizer = build_train_graph(
            train_elements,
            FLAGS.depict_input_dim,
            FLAGS.depict_output_dim,
            func=FLAGS.loss_function)
        train_saver = tf.train.Saver()
        train_merger = tf.summary.merge_all()
        train_initializer = tf.global_variables_initializer()
        # train_parameters = tf.trainable_variables()
    eval_graph = tf.Graph()
    with eval_graph.as_default():
        eval_filenames, eval_iterator, eval_elements = \
            build_text_line_reader(shuffle=True, batch_size=FLAGS.eval_batch_size)
        eval_inputs, eval_outputs = build_eval_graph(eval_elements,
                                                     FLAGS.depict_input_dim,
                                                     FLAGS.depict_output_dim)
        eval_saver = tf.train.Saver()
        eval_merger = tf.summary.merge_all()
        eval_initializer = tf.global_variables_initializer()
        # eval_parameters = tf.trainable_variables()
    infer_graph = tf.Graph()
    with infer_graph.as_default():
        infer_filenames, infer_iterator, infer_elements = \
            build_text_line_reader(shuffle=False, batch_size=FLAGS.infer_batch_size)
        infer_inputs, infer_outputs = build_infer_graph(
            infer_elements, FLAGS.depict_input_dim, FLAGS.depict_output_dim)
        rbfnn_metrics = build_metrics_graph('rbfnn')
        # kmeans_metrics = build_metrics_graph('kmeans')
        infer_saver = tf.train.Saver()
        infer_merger = tf.summary.merge_all()
        infer_initializer = tf.global_variables_initializer()

    config = tf.ConfigProto(device_count={"GPU": 1})
    train_sess = tf.Session(graph=train_graph, config=config)
    eval_sess = tf.Session(graph=eval_graph, config=config)
    infer_sess = tf.Session(graph=infer_graph, config=config)

    # train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', train_graph)
    # validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/validation', eval_graph)
    # infer_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/inference', infer_graph)

    train_sess.run(train_initializer)
    # eval_sess.run(eval_initializer)
    # infer_sess.run(infer_initializer)

    import utils

    results = dict()
    for epoch in itertools.count():
        if epoch > FLAGS.how_many_training_epoches:
            break

        train_generator = utils.build_data_generator(
            xtrain, shuffle=True, batch_size=FLAGS.train_batch_size)
        for batch, xs_train in enumerate(train_generator):
            _, training_cost = train_sess.run(
                [optimizer, train_cost], feed_dict={train_inputs: xs_train})
        if epoch % 1 == 0:
            checkpoint_path = train_saver.save(train_sess,
                                               FLAGS.checkpoints_dir +
                                               '/checkpoints',
                                               global_step=epoch)
            # train_saver.save(train_sess, FLAGS.saved_model_dir + '/checkpoints_' + str(FLAGS.depict_output_dim), global_step=epoch)
            infer_saver.restore(infer_sess, checkpoint_path)

            infers_train = []
            infer_generator = utils.build_data_generator(
                xtrain, shuffle=False, batch_size=FLAGS.infer_batch_size)
            for batch, xs_infer in enumerate(infer_generator):
                ys_infer = infer_sess.run(infer_outputs,
                                          feed_dict={infer_inputs: xs_infer})
                infers_train.extend(ys_infer)
            infers_test = []
            infer_generator = utils.build_data_generator(
                xtest, shuffle=False, batch_size=FLAGS.infer_batch_size)
            for batch, xs_infer in enumerate(infer_generator):
                ys_infer = infer_sess.run(infer_outputs,
                                          feed_dict={infer_inputs: xs_infer})
                infers_test.extend(ys_infer)
            print(len(infers_train), len(infers_test))

            metrics = classifier.run(infers_train, infers_test, FLAGS)
            pprint.pprint(metrics)
            results[i] = metrics
            utils.write_results(FLAGS, metrics, epoch)
    train_sess.close()
    eval_sess.close()
    infer_sess.close()
    return results