コード例 #1
0
def main():
    parser = argparse.ArgumentParser()
    arg = parser.add_argument
    arg('--lr',type=float,default=0.1)
    arg('--n_epochs',type=int, default = 5)
    arg('--batch-size',type=int, default= 32)
    arg('--data_dir',type=str,default = 'chest_xray')
    arg('--model',type=str, default ='chexnet',choices = model_list.keys())
    arg('--root',type=str,default ='runs/debug', help = 'checkpoint root')

    args = parser.parse_args()

    train_loader= generate_trainloaders(data_dir= args.data_dir,
                                                batch_size= args.batch_size)

    root = Path(args.root)
    root.mkdir(exist_ok = True, parents = True)

    model = model_list[args.model]
    loss = CrossEntropyLoss()

    utils.fit(
        init_optimizer= lambda lr:SGD(model.parameters(),lr = args.lr),
        args= args,
        model = model,
        train_loader = train_loader,
        criterion= loss,
        n_epochs= args.n_epochs,
        train_on_gpu=check_gpu(),
        dir_save = args.root,
        lr = args.lr,
        base_model=args.model
    )
コード例 #2
0
def rollout_mcmc(x,
                bounds,
                func_policy, 
                depth_h, 
                _queries, 
                _values, 
                N_q = 5,
                n_sample=10,
                decay_rate=.9,
                ARD_Flag = False,
                length_scale = None
                    ):
    if len(x.shape) == 1:
        x = np.array([x])
        
    kernel = GPy.kern.RBF(len(bounds), ARD=ARD_Flag, lengthscale=length_scale)
    gp_model = fit(_queries, _values, kernel)
    U = ei(x,bounds,gp_model)
    if depth_h == 0:
        return U
    else:        
        queriesori = np.copy(_queries)
        valuesori = np.copy(_values)
        Udelays = np.array([])
        _mu, _sig = gp_model.predict(x)
        for i in range(n_sample):
            _Udelay = 0
            _queriesf = np.copy(queriesori)
            _valuesf = np.copy(valuesori)
            #kernel = GPy.kern.RBF(len(bounds), ARD=ARD_Flag, lengthscale=length_scale)
            #gp_model = fit(_queries, _values, kernel)
            #predict 1st step
            #mu, sig = gp_model.predict(x)
            y_next = np.array(np.random.normal(_mu, _sig))
            _queriesf = np.concatenate([_queriesf,x])
            _valuesf = np.concatenate([_valuesf,y_next])
            for j in range(depth_h):
                _remain_h = depth_h - j - 1
                #kernel = GPy.kern.RBF(len(bounds), ARD=ARD_Flag, lengthscale=length_scale)
                gp_model = fit(_queriesf, _valuesf, kernel)
                x_next = func_policy(gp_model, _remain_h, bounds)
                _Udelay += decay_rate * ei(x_next,bounds,gp_model)

                _queriesf = np.concatenate([_queriesf,x_next])
                mu, sig = gp_model.predict(x_next)
                next_y = np.array(np.random.normal(mu, sig))
                _valuesf = np.concatenate([_valuesf,next_y])
            Udelays = np.append(Udelays, _Udelay)
        U += np.mean(Udelays)
        return U
コード例 #3
0
def rollout_utility_archive(x,
                            bounds,
                            func_policy,
                            depth_h,
                            _queries,
                            _values,
                            N_q,
                            n_sample=None,
                            decay_rate=0.9,
                            ARD_Flag=False,
                            length_scale=None):
    #print(depth_h)
    global U
    if len(x.shape) == 1:
        x = np.array([x])
    kernel = GPy.kern.RBF(len(bounds), ARD=ARD_Flag, lengthscale=length_scale)
    gp_model = fit(_queries, _values, kernel)  #todo:memo
    if depth_h == 0:
        U += ei(x, bounds, gp_model)
    else:
        U += ei(x, bounds, gp_model)
        _queries = np.concatenate([_queries, x])
        points, weights = gauss_hermite(x, gp_model, N_q)
        for i in range(N_q):
            val = np.array([[points[0][i]]])
            _values = np.concatenate([_values, val])
            kernel = GPy.kern.RBF(len(bounds),
                                  ARD=ARD_Flag,
                                  lengthscale=length_scale)
            #print("X",_queries)
            #print("Y",_values)
            _gp_model = fit(_queries, _values, kernel)  #todo:memo
            #print(i,"afterfit_afterker")
            x_next = func_policy(_gp_model, depth_h, bounds)
            U = U + weights[i] * decay_rate * rollout_utility_archive(
                x_next,
                bounds,
                func_policy,
                depth_h - 1,
                _queries,
                _values,
                N_q,
                decay_rate,
                ARD_Flag=ARD_Flag,
                length_scale=length_scale)
            _values = np.copy(_values[:-1, :])
        _queries = np.copy(_queries[:-1, :])
    _U = U
    U = 0
    return _U
コード例 #4
0
ファイル: builder.py プロジェクト: sergiomsantos/protosyn
    def load_fragments():
        from residue import Residue
        import os

        ref = Residue('REF')
        ref.append_atom(Atom( 'N', [0.000,  0.300, 0.000]))
        ref.append_atom(Atom('CA', [1.181, -0.540, 0.000]))
        ref.append_atom(Atom( 'C', [2.440,  0.300, 0.000]))
        
        # load all residues
        this_dir, this_filename = os.path.split(__file__)
        DATA_PATH = os.path.join(this_dir, "static", "residues.pdb")
        
        residue_container = Molecule.LoadFromFile(DATA_PATH)
        for fragment in residue_container.iter_residues():
            fit(ref, ('N','CA','C'), fragment, ('N','CA','C'))
            FragmentProvider.FRAGMENTS[fragment.name] = fragment
コード例 #5
0
ファイル: conditions.py プロジェクト: xDadiKx/BEE2.4
def place_catwalk_connections(instances, point_a, point_b):
    """Place catwalk sections to connect two straight points."""
    diff = point_b - point_a

    # The horizontal unit vector in the direction we are placing catwalks
    direction = diff.copy()
    direction.z = 0
    distance = direction.len() - 128
    direction = direction.norm()

    if diff.z > 0:
        angle = INST_ANGLE[direction.as_tuple()]
        # We need to add stairs
        for stair_pos in range(0, int(diff.z), 128):
            # Move twice the vertical horizontally
            # plus 128 so we don't start in point A
            loc = point_a + (2 * stair_pos + 128) * direction
            # Do the vertical offset
            loc.z += stair_pos
            VMF.create_ent(classname="func_instance", origin=loc.join(" "), angles=angle, file=instances["stair"])
        # This is the location we start flat sections at
        point_a = loc + 128 * direction
        point_a.z += 128
    elif diff.z < 0:
        # We need to add downward stairs
        # They point opposite to normal ones
        utils.con_log("down from", point_a)
        angle = INST_ANGLE[(-direction).as_tuple()]
        for stair_pos in range(0, -int(diff.z), 128):
            utils.con_log(stair_pos)
            # Move twice the vertical horizontally
            loc = point_a + (2 * stair_pos + 256) * direction
            # Do the vertical offset plus additional 128 units
            # to account for the moved instance
            loc.z -= stair_pos + 128
            VMF.create_ent(classname="func_instance", origin=loc.join(" "), angles=angle, file=instances["stair"])
        # Adjust point A to be at the end of the catwalks
        point_a = loc
    # Remove the space the stairs take up from the horiz distance
    distance -= abs(diff.z) * 2

    # Now do straight sections
    utils.con_log("Stretching ", distance, direction)
    angle = INST_ANGLE[direction.as_tuple()]
    loc = point_a + (direction * 128)

    # Figure out the most efficent number of sections
    for segment_len in utils.fit(distance, [512, 256, 128]):
        VMF.create_ent(
            classname="func_instance",
            origin=loc.join(" "),
            angles=angle,
            file=instances["straight_" + str(segment_len)],
        )
        utils.con_log(loc)
        loc += segment_len * direction
コード例 #6
0
ファイル: gan.py プロジェクト: alexajm/gan-facemask-design
 def fit(self, inputs, outputs, num_epochs=10):
     """Train projector given masked faces as input and transparency masks as outputs.
     
     # Arguments
     * `inputs` - a batch of faces to train on
     * `outputs` - the ID of every input face
     * `num_epochs` - number of epochs to run, defaults to 10
     """
     self.train()
     return utils.fit(self, inputs, outputs, num_epochs=num_epochs)
コード例 #7
0
    def preprocess(self, training_data_path):
        ''''''
        def tokenizer(iterator):
            for value in iterator:
                yield value

        tokens, labels = read_data(training_data_path)
        model_dir = self.params[constants.PARAM_KEY_MODEL_DIR]

        if self.label_vocab is None:
            logging.info("generating label vocabulary ...")
            self.label_vocab = learn.preprocessing.VocabularyProcessor(
                max_document_length=self.params[
                    constants.PARAM_KEY_MAX_DOCUMENT_LEN],
                tokenizer_fn=tokenizer)
            self.label_vocab.fit(labels)
            logging.info("label vocabulary size = %d",
                         len(self.label_vocab.vocabulary_))

            label_vocab_path = os.path.join(model_dir,
                                            constants.FILENAME_LABEL_VOCAB)
            deeptext.utils.serialization.save(self.label_vocab,
                                              label_vocab_path)
        self.label_ids = self.preprocess_label_transform(labels)
        self.params[constants.PARAM_KEY_LABEL_VOCAB_SIZE] = len(
            self.label_vocab.vocabulary_)

        self.tokens = tokens
        print constants.TENSOR_NAME_TOKENS
        print self.params[constants.PARAM_KEY_MAX_DOCUMENT_LEN]
        print self.params[constants.PARAM_KEY_EMBEDDING_SIZE]

        self.word2vec_model = fit(
            self.tokens,
            os.path.join(model_dir, constants.FILENAME_WORD2VEC_MODEL))
        self.tensor_tokens = tf.placeholder(
            dtype=tf.float32,
            name=constants.TENSOR_NAME_TOKENS,
            shape=[
                None, self.params[constants.PARAM_KEY_MAX_DOCUMENT_LEN],
                self.params[constants.PARAM_KEY_EMBEDDING_SIZE]
            ])
        self.tensor_labels = tf.placeholder_with_default(
            self.label_ids,
            name=constants.TENSOR_NAME_LABELS,
            shape=[None, self.params[constants.PARAM_KEY_MAX_DOCUMENT_LEN]])

        print 'build_model'
        self.build_model(self.tensor_tokens, self.tensor_labels)
コード例 #8
0
ファイル: builder.py プロジェクト: sergiomsantos/protosyn
def grow(chain, *frag_names):
    '''
    grow(chain, *fragment_names)

    Grow a peptide chain by appending fragments to the
    C-terminus of the chain.
    '''
    if not frag_names:
        return chain
    
    if chain.count_residues() == 0:
        fragment = FragmentProvider.get_fragment(frag_names[0])
        chain.append_residue(fragment, is_head=True)
        return grow(chain, *frag_names[1:])
    
    tail = chain.residues[-1]
    for n,frag_name in enumerate(frag_names, start=1):
        
        fragment = FragmentProvider.get_fragment(frag_name)
        
        if n == 1:
            R,T = fit(fragment, ('N','CA','C'), tail, ('N','CA','C'))
            invR = np.linalg.inv(R)

        # calculate new fragment's coordinates (flipped and translated),
        xyz = fragment.get_coordinates()*[1,(-1)**n,1] + [n*3.638,0,0]
        xyz = np.dot(invR, np.transpose(xyz - T)).T
        fragment.set_coordinates(xyz)
        chain.append_residue(fragment)

    # take the tail residue back to its original position
    xyz = tail.get_coordinates()
    xyz = np.dot(invR, np.transpose(xyz - T)).T
    tail.set_coordinates(xyz)

    return chain
コード例 #9
0
            sigma_depth = (2. * sigma) / np.sqrt(I)

            # If initial SNR estimate is larger than 5-sigma, perform the fit:
            if tdepth / sigma_depth > 5:

                print(
                    '\t >> Performing fit for ' + sector +
                    '; expected depth precision FOR ALL SECTORS: ',
                    sigma_depth * 1e6, ' giving SNR:', tdepth / sigma_depth)

                if not os.path.exists(planet):
                    os.mkdir(planet)

                full_path = planet + '/' + sector

                utils.fit(t[sector], f[sector], ferr[sector], sector, period, period_err, t0, t0_err, ecc, omega, GPmodel = 'ExpMatern', outpath = full_path, \
                          method = method, in_transit_length = factor*tdur)
                utils.fit(t[sector], f[sector], ferr[sector], sector, period, period_err, t0, t0_err, ecc, omega, GPmodel = 'QP', outpath = full_path, \
                          method = method, in_transit_length = factor*tdur)

                if fit_catwoman:

                    utils.fit(t[sector], f[sector], ferr[sector], sector, period, period_err, t0, t0_err, ecc, omega, GPmodel = 'ExpMatern', outpath = full_path, \
                              method = method, in_transit_length = factor*tdur, fit_catwoman = fit_catwoman)
                    utils.fit(t[sector], f[sector], ferr[sector], sector, period, period_err, t0, t0_err, ecc, omega, GPmodel = 'QP', outpath = full_path, \
                              method = method, in_transit_length = factor*tdur, fit_catwoman = fit_catwoman)

                good_sectors.append(sector)

            else:
                print(
                    '\t WARNING: ', sector,
コード例 #10
0
def place_catwalk_connections(vmf: VMF, instances, point_a: Vec, point_b: Vec):
    """Place catwalk sections to connect two straight points."""
    diff = point_b - point_a

    # The horizontal unit vector in the direction we are placing catwalks
    direction = diff.copy()
    direction.z = 0
    distance = direction.len() - 128
    direction = direction.norm()

    if diff.z > 0:
        angle = INST_ANGLE[direction.as_tuple()]
        # We need to add stairs
        for stair_pos in range(0, int(diff.z), 128):
            # Move twice the vertical horizontally
            # plus 128 so we don't start in point A
            loc = point_a + (2 * stair_pos + 128) * direction
            # Do the vertical offset
            loc.z += stair_pos
            vmf.create_ent(
                classname='func_instance',
                origin=loc.join(' '),
                angles=angle,
                file=instances['stair'],
            )
        # This is the location we start flat sections at
        point_a = loc + 128 * direction
        point_a.z += 128
    elif diff.z < 0:
        # We need to add downward stairs
        # They point opposite to normal ones
        LOGGER.debug('down from {}', point_a)
        angle = INST_ANGLE[(-direction).as_tuple()]
        for stair_pos in range(0, -int(diff.z), 128):
            LOGGER.debug(stair_pos)
            # Move twice the vertical horizontally
            loc = point_a + (2 * stair_pos + 256) * direction  # type: Vec
            # Do the vertical offset plus additional 128 units
            # to account for the moved instance
            loc.z -= (stair_pos + 128)
            vmf.create_ent(
                classname='func_instance',
                origin=loc.join(' '),
                angles=angle,
                file=instances['stair'],
            )
        # Adjust point A to be at the end of the catwalks
        point_a = loc
    # Remove the space the stairs take up from the horiz distance
    distance -= abs(diff.z) * 2

    # Now do straight sections
    LOGGER.debug('Stretching {} {}', distance, direction)
    angle = INST_ANGLE[direction.as_tuple()]
    loc = point_a + (direction * 128)

    # Figure out the most efficent number of sections
    for segment_len in utils.fit(
            distance,
            [512, 256, 128]
            ):
        vmf.create_ent(
            classname='func_instance',
            origin=loc.join(' '),
            angles=angle,
            file=instances['straight_' + str(segment_len)],
        )
        loc += (segment_len * direction)
コード例 #11
0
ファイル: train.py プロジェクト: rsk2327/BMIN_Project
                              sort_within_batch=True,
                              repeat=False)

validdl = data.BucketIterator(dataset=(validds),
                              batch_size=6,
                              sort_key=lambda x: len(x.text),
                              device=device,
                              sort_within_batch=True,
                              repeat=False)

train_batch_it = BatchGenerator(traindl, 'text', 'Category')
valid_batch_it = BatchGenerator(validdl, 'text', 'Category')

vocab_size = len(txt_field.vocab)

model = SimpleGRU(vocab_size,
                  embedding_dim,
                  n_hidden,
                  n_out,
                  trainds.fields['text'].vocab.vectors,
                  dropout=dropout).to(device)

opt = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), 1e-3)

fit(model=model,
    train_dl=train_batch_it,
    val_dl=valid_batch_it,
    loss_fn=F.nll_loss,
    opt=opt,
    epochs=train_epochs)
コード例 #12
0
def make_straight(
    vmf: VMF,
    origin: Vec,
    normal: Vec,
    dist: int,
    config: Config,
    is_start=False,
) -> None:
    """Make a straight line of instances from one point to another."""
    angles = round(normal, 6).to_angle()
    orient = Matrix.from_angle(angles)

    # The starting brush needs to stick out a bit further, to cover the
    # point_push entity.
    start_off = -96 if is_start else -64

    p1, p2 = Vec.bbox(
        origin + Vec(start_off, -config.trig_radius, -config.trig_radius) @ orient,
        origin + Vec(dist - 64, config.trig_radius, config.trig_radius) @ orient,
    )

    solid = vmf.make_prism(p1, p2, mat='tools/toolstrigger').solid

    motion_trigger(vmf, solid.copy())

    push_trigger(vmf, origin, normal, [solid])

    off = 0
    for seg_dist in utils.fit(dist, config.inst_straight_sizes):
        vmf.create_ent(
            classname='func_instance',
            origin=origin + off * orient.forward(),
            angles=angles,
            file=config.inst_straight[seg_dist],
        )
        off += seg_dist
    # Supports.
    if config.inst_support:
        for off in range(0, int(dist), 128):
            position = origin + off * normal
            placed_support = False
            for supp_dir in [
                orient.up(), orient.left(),
                -orient.left(), -orient.up()
            ]:
                try:
                    tile = tiling.TILES[
                        (position - 128 * supp_dir).as_tuple(),
                        supp_dir.norm().as_tuple()
                    ]
                except KeyError:
                    continue
                # Check all 4 center tiles are present.
                if all(tile[u, v].is_tile for u in (1, 2) for v in (1, 2)):
                    vmf.create_ent(
                        classname='func_instance',
                        origin=position,
                        angles=Matrix.from_basis(x=normal, z=supp_dir).to_angle(),
                        file=config.inst_support,
                    )
                    placed_support = True
            if placed_support and config.inst_support_ring:
                vmf.create_ent(
                    classname='func_instance',
                    origin=position,
                    angles=angles,
                    file=config.inst_support_ring,
                )
コード例 #13
0
model_config_path = os.path.join(args.model_dir, 'model_config.json')
if args.model_config is not None:
    model_config_path = args.model_config
model_config = utils.load_json(model_config_path)
game_step = model_config['game_step']
the_model = model.MinimalModel(**model_config)

eval_config = utils.load_json(args.eval_config)

if args.mode == 'train':
    train_config_path = os.path.join(args.model_dir, 'train_config.json')
    if args.train_config is not None:
        train_config_path = args.train_config
    train_config = utils.load_json(train_config_path)

    train_data = data.RandomDataset(game_step, **train_config['data'])
    valid_datasets = [ ('random_data', data.RandomDataset(game_step, **eval_config)) ]
    if game_step == 1:
        valid_datasets.append( ('minimal_data', data.MinimalDataset()) )

    utils.fit(the_model, train_data, valid_datasets, **train_config['schedule'])

elif args.mode == 'test':
    test_data = data.RandomDataset(game_step, **eval_config)
    accuracy = utils.eval(the_model, test_data)
    print(accuracy)

else:
    assert False, 'invalid mode'
コード例 #14
0
def main():

    parser = argparse.ArgumentParser(description='relaxation terms regression')

    #    parser.add_argument('-p', '--process', type=str,
    #                        choices=["shear", "bulk", "conductivity", "thermal_diffusion", "mass_diffusion"],
    #                        default="shear,bulk,conductivity,thermal_diffusion,mass_diffusion",
    #                        help='Comma-separated names of transport properties whose regression is performed')

    parser.add_argument('-a',
                        '--algorithm',
                        type=str,
                        choices=[
                            'DT', 'RF', 'ET', 'GP', 'KN', 'SVM', 'KR', 'GB',
                            'HGB', 'MLP'
                        ],
                        default='DT',
                        help='regression algorithm')

    args = parser.parse_args()

    #    process   = args.process.split(',')
    #    print("Process: ", colored(process[0], 'green'))

    algorithm = args.algorithm.split(',')
    print("Algorithm: ", colored(algorithm[0], 'blue'))

    src_dir = "."
    print("SRC: ", colored(src_dir, 'yellow'))

    output_dir = src_dir + "/.."
    print("OUTPUT: ", colored(output_dir, 'red'))

    n_jobs = 2

    # Import database
    dataset = np.loadtxt("../data/transposed_reshaped_data.txt")
    #   with open('../data/TCs_air5.txt') as f:
    #       lines = (line for line in f if not line.startswith('#'))
    #       dataset = np.loadtxt(lines, skiprows=1)

    print(dataset.shape)

    #    if (process[0] == "shear"):
    #        x = dataset[:,0:7] # T, P, x_N2, x_O2, x_NO, x_N, x_O
    #        y = dataset[:,7:8] # shear viscosity
    #    elif (process[0] == "bulk"):
    #        x = dataset[:,0:7] # T, P, x_N2, x_O2, x_NO, x_N, x_O
    #        y = dataset[:,8:9] # bulk viscosity
    #    elif (process[0] == "conductivity"):
    #        x = dataset[:,0:7] # T, P, x_N2, x_O2, x_NO, x_N, x_O
    #        y = dataset[:,9:10]# thermal conductivity
    #    elif (process[0] == "thermal_diffusion"):
    #        x = dataset[:,0:7] # T, P, x_N2, x_O2, x_NO, x_N, x_O
    #        y = dataset[:,10:] # thermal diffusion, D_Ti
    #    elif (process[0] == "mass_diffusion"):
    #        x = dataset[:,0:7] # T, P, x_N2, x_O2, x_NO, x_N, x_O
    #        y = dataset[:,:]   # mass diffusion TODO

    x = dataset[:, 0:50]  # ni_n[47], na_n[1], V, T
    y = dataset[:, 50:]  # RD_mol[47], RD_at[1]

    print(x.shape)
    print(y.shape)

    print("### Phase 1: PRE_PROCESSING ###")
    ########################################

    # 1.0) create directory tree
    model, scaler, figure = utils.mk_tree(algorithm[0], output_dir)

    # 1.1) train/test split dataset
    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        train_size=0.75,
                                                        test_size=0.25,
                                                        random_state=69)

    # 1.2) scale data and save scalers
    sc_x = StandardScaler()
    sc_y = StandardScaler()

    sc_x.fit(x_train)
    x_train = sc_x.transform(x_train)
    x_test = sc_x.transform(x_test)

    sc_y.fit(y_train)
    y_train = sc_y.transform(y_train)
    y_test = sc_y.transform(y_test)

    print('Training Features Shape:', x_train.shape)
    print('Training Labels Shape:', y_train.shape)
    print('Testing Features Shape:', x_test.shape)
    print('Testing Labels Shape:', y_test.shape)

    dump(sc_x, open(scaler + "/scaler_x.pkl", 'wb'))
    dump(sc_y, open(scaler + "/scaler_y.pkl", 'wb'))

    print("### Phase 2: PROCESSING ###")
    ####################################

    # 2.0) estimator selection
    if (algorithm[0] == 'DT'):
        est, hyper_params = estimators.est_DT()

    elif (algorithm[0] == 'ET'):
        est, hyper_params = estimators.est_ET()

    elif (algorithm[0] == 'SVM'):
        est, hyper_params = estimators.est_SVM()

    elif (algorithm[0] == 'KR'):
        est, hyper_params = estimators.est_KR()

    elif (algorithm[0] == 'KN'):
        est, hyper_params = estimators.est_KN()

    elif (algorithm[0] == 'MLP'):
        est, hyper_params = estimators.est_MLP()

    elif (algorithm[0] == 'GB'):
        est, hyper_params = estimators.est_GB()

    elif (algorithm[0] == 'HGB'):
        est, hyper_params = estimators.est_HGB()

    elif (algorithm[0] == 'RF'):
        est, hyper_params = estimators.est_RF()

    elif (algorithm[0] == 'GB'):
        est, hyper_params = estimators.est_GB()

    else:
        print("Algorithm not implemented ...")

    # 2.1) search for best hyper-parameters combination
    # Exhaustive search over specified parameter values for the estimator
    # https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html
    gs = GridSearchCV(est,
                      cv=10,
                      param_grid=hyper_params,
                      verbose=2,
                      n_jobs=n_jobs,
                      scoring='r2',
                      refit=True,
                      pre_dispatch='n_jobs',
                      error_score=np.nan,
                      return_train_score=True)

    # Randomized search on hyper parameters
    # https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html#sklearn.model_selection.RandomizedSearchCV
    # class sklearn.model_selection.RandomizedSearchCV(estimator, param_distributions, *, n_iter=10, scoring=None, n_jobs=None, refit=True,
    #                                                  cv=None, verbose=0, pre_dispatch='2*n_jobs', random_state=None, error_score=nan,
    #                                                  return_train_score=False)
    #gs = RandomizedSearchCV(est, cv=10, n_iter=10, param_distributions=hyper_params, verbose=2, n_jobs=n_jobs, scoring='r2',
    #                        refit=True, pre_dispatch='n_jobs', error_score=np.nan, return_train_score=True)

    # 2.2) training
    utils.fit(x_train, y_train, gs)

    # 2.3) prediction
    y_regr = utils.predict(x_test, gs)

    print("### Phase 3: POST-PROCESSING ###")
    #########################################

    # 3.0) save best hyper-parameters
    results = pd.DataFrame(gs.cv_results_)
    # https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_csv.html
    #compression_opts = dict(method='zip', archive_name='GridSearchCV_results.csv')
    #results.to_csv('GridSearchCV_results.zip', index=False, compression=compression_opts)
    results.to_csv(model + "/../" + "GridSearchCV_results.csv",
                   index=False,
                   sep='\t',
                   encoding='utf-8')

    # results print screen
    print("Best: %f using %s" % (gs.best_score_, gs.best_params_))
    means = gs.cv_results_['mean_test_score']
    stds = gs.cv_results_['std_test_score']
    params = gs.cv_results_['params']
    for mean, stdev, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, stdev, param))

    # 3.1) compute score metrics
    utils.scores(sc_x, sc_y, x_train, y_train, x_test, y_test, model, gs)

    # 3.2) back to original values (unscaling)
    x_test_dim = sc_x.inverse_transform(x_test)
    y_test_dim = sc_y.inverse_transform(y_test)
    y_regr_dim = sc_y.inverse_transform(y_regr)

    # 3.3) make plots
    utils.draw_plot(x_test_dim, y_test_dim, y_regr_dim, figure)

    # 3.4) save model to disk
    dump(gs, model + "/model.sav")
コード例 #15
0
ファイル: run_regression.py プロジェクト: lkampoli/ML4STS
def main():

    parser = argparse.ArgumentParser(description='Omega integrals')

    parser.add_argument('-p', '--process', type=str,
                        choices=["omega11", "omega12", "omega13", "omega22", "omegas"],
                        default="omegas",
                        help='Comma-separated names of omega integrals whose regression is performed')

    parser.add_argument('-a', '--algorithm', type=str,
                        choices=['DT', 'RF', 'ET', 'GP', 'KN', 'SVM', 'KR', 'GB', 'HGB', 'MLP'],
                        default='DT',
                        help='transport algorithm')

    parser.add_argument('-l', '--load_model', type=str2bool,
                        nargs='?',
                        choices=[False, True],
                        default=False,
                        const=True,
                        help='Load saved model')

    args = parser.parse_args()

    process = args.process.split(',')
    print("Process: ", colored(process[0], 'green'))

    algorithm = args.algorithm.split(',')
    print("Algorithm: ", colored(algorithm[0],'blue'))

    load_model = args.load_model
    print("Load: ", colored(load_model,'magenta'))

    src_dir = "."
    print("SRC: ", colored(src_dir,'yellow'))

    output_dir = src_dir+"/.."
    print("OUTPUT: ", colored(output_dir,'red'))

    n_jobs = 2

    # Import database
    with open('../data/omega_integrals_encoded.txt') as f:
        lines = (line for line in f if not line.startswith('#'))
        dataset = np.loadtxt(lines, skiprows=1)
    print(dataset.shape)

    x = dataset[:,0:3] # c, d, T
    y = dataset[:,3:]  # Ω(1,1), Ω(1,2), Ω(1,3), Ω(2,2)
    print(x.shape)
    print(y.shape)

    print("### Phase 1: PRE_PROCESSING ###")
    ########################################

    # 1.0) create directory tree
    model, scaler, figure = utils.mk_tree(process[0], algorithm[0], output_dir)

    # 1.1) train/test split dataset
    x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.75, test_size=0.25, random_state=69)

    # 1.2) scale data and save scalers
    sc_x = StandardScaler()
    sc_y = StandardScaler()

    sc_x.fit(x_train)
    x_train = sc_x.transform(x_train)
    x_test  = sc_x.transform(x_test)

    sc_y.fit(y_train)
    y_train = sc_y.transform(y_train)
    y_test  = sc_y.transform(y_test)

    print('Training Features Shape:', x_train.shape)
    print('Training Labels Shape:',   y_train.shape)
    print('Testing Features Shape:',  x_test.shape)
    print('Testing Labels Shape:',    y_test.shape)

    dump(sc_x, open(scaler+"/scaler_x_"+process[0]+'.pkl', 'wb'))
    dump(sc_y, open(scaler+"/scaler_y_"+process[0]+'.pkl', 'wb'))

    print("### Phase 2: PROCESSING ###")
    ####################################

    # 2.0) estimator selection
    if (algorithm[0] == 'DT'):
        est, hyper_params = estimators.est_DT()

    elif (algorithm[0] == 'ET'):
        est, hyper_params = estimators.est_ET()

    elif (algorithm[0] == 'SVM'):
        est, hyper_params = estimators.est_SVM()

    elif (algorithm[0] == 'KR'):
        est, hyper_params = estimators.est_KR()

    elif (algorithm[0] == 'KN'):
        est, hyper_params = estimators.est_KN()

    elif (algorithm[0] == 'MLP'):
        est, hyper_params = estimators.est_MLP()

    elif (algorithm[0] == 'GB'):
        est, hyper_params = estimators.est_GB()

    elif (algorithm[0] == 'HGB'):
        est, hyper_params = estimators.est_HGB()

    elif (algorithm[0] == 'RF'):
        est, hyper_params = estimators.est_RF()

    elif (algorithm[0] == 'GB'):
        est, hyper_params = estimators.est_GB()

    else:
        print("Algorithm not implemented ...")

    # 2.1) search for best hyper-parameters combination
    # Exhaustive search over specified parameter values for the estimator
    # https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html
    gs = GridSearchCV(est, cv=3, param_grid=hyper_params, verbose=2, n_jobs=n_jobs, scoring='r2',
                      refit=True, pre_dispatch='n_jobs', error_score=np.nan, return_train_score=True)

    # Randomized search on hyper parameters
    # https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html#sklearn.model_selection.RandomizedSearchCV
    # class sklearn.model_selection.RandomizedSearchCV(estimator, param_distributions, *, n_iter=10, scoring=None, n_jobs=None, refit=True,
    #                                                  cv=None, verbose=0, pre_dispatch='2*n_jobs', random_state=None, error_score=nan,
    #                                                  return_train_score=False)
    #gs = RandomizedSearchCV(est, cv=10, n_iter=10, param_distributions=hyper_params, verbose=2, n_jobs=n_jobs, scoring='r2',
    #                        refit=True, pre_dispatch='n_jobs', error_score=np.nan, return_train_score=True)

    # 2.2) training
    utils.fit(x_train, y_train, gs)

    # 2.3) prediction
    y_regr = utils.predict(x_test, gs)

    print("### Phase 3: POST-PROCESSING ###")
    #########################################

    # 3.0) save best hyper-parameters
    results = pd.DataFrame(gs.cv_results_)
    # https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_csv.html
    #compression_opts = dict(method='zip', archive_name='GridSearchCV_results.csv')
    #results.to_csv('GridSearchCV_results.zip', index=False, compression=compression_opts)
    results.to_csv(model+"/../"+"GridSearchCV_results.csv", index=False, sep='\t', encoding='utf-8')

    # results print screen
    print("Best: %f using %s" % (gs.best_score_, gs.best_params_))
    means  = gs.cv_results_['mean_test_score']
    stds   = gs.cv_results_['std_test_score']
    params = gs.cv_results_['params']
    for mean, stdev, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, stdev, param))

    # 3.1) compute score metrics
    utils.scores(sc_x, sc_y, x_train, y_train, x_test, y_test, model, gs)

    # 3.2) back to original values (unscaling)
    x_test_dim = sc_x.inverse_transform(x_test)
    y_test_dim = sc_y.inverse_transform(y_test)
    y_regr_dim = sc_y.inverse_transform(y_regr)

    # 3.3) make plots
    utils.draw_plot(x_test_dim, y_test_dim, y_regr_dim, figure, process[0], algorithm[0])

    # 3.4) save model to disk
    dump(gs, model+"/model_"+process[0]+".sav")
コード例 #16
0
print('Trained model loss function loaded...')
print(f"Previously trained for {epochs} number of epochs...")
# train for more epochs
epochs = new_epochs
print(f"Train for {epochs} more epochs...")

# train data loader
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False)

train_loss, train_accuracy = [], []
val_loss, val_accuracy = [], []
for epoch in range(epochs):
    print(f"Epoch {epoch+1} of {epochs}")
    train_epoch_loss, train_epoch_accuracy = fit(model, train_loader,
                                                 optimizer, criterion,
                                                 train_data)
    val_epoch_loss, val_epoch_accuracy = validate(model, val_loader, optimizer,
                                                  criterion, val_data)
    train_loss.append(train_epoch_loss)
    train_accuracy.append(train_epoch_accuracy)
    val_loss.append(val_epoch_loss)
    val_accuracy.append(val_epoch_accuracy)
    print(
        f"Train Loss: {train_epoch_loss:.4f}, Train Acc: {train_epoch_accuracy:.2f}"
    )
    print(f'Val Loss: {val_epoch_loss:.4f}, Val Acc: {val_epoch_accuracy:.2f}')

# accuracy plots
plt.figure(figsize=(10, 7))
plt.plot(train_accuracy, color='green', label='train accuracy')
コード例 #17
0
ファイル: predict.py プロジェクト: YannouRavoet/titanic
from utils import import_data, data_wrangling, features, fit, print_stats, create_submission_file

if __name__ == "__main__":
    train_data, test_data = import_data()
    train_data, test_data = data_wrangling(train_data, test_data)
    X, y, X_test = features(train_data, test_data)
    model = fit(X, y)
    print_stats(X, y, model)  #without crossvalidation
    create_submission_file(model, X_test, test_data)
コード例 #18
0
ファイル: train.py プロジェクト: RodinDmitry/ReviewAnalysis
     ('plot_score', PLOT_SCORE), ('image_score', IMAGE_SCORE), ('music_score', MUSIC_SCORE),
     ('actors_score', ACTORS_SCORE), ('name0', None)], skip_header=True)


train, val = get_dataset(union_toloka_result_proc_path).split()
golden_train = get_dataset(union_golden_proc_path2)
TEXT.build_vocab(train, max_size=30000)

model_path = "./models/model"
rnn_model = MultiModel(model=BiLSTMClassifier(300, len(TEXT.vocab.stoi), 256, 2).to(device))
# rnn_model.load_state_dict(torch.load(model_path))


batch_size = 32
train_iter, val_iter = data.BucketIterator.splits(
    (train, val), sort_key=lambda x: len(x.text),
    batch_sizes=(batch_size, batch_size), device=device)
golden_iter = data.BucketIterator(golden_train, sort_key=lambda x: len(x.text), batch_size=batch_size, device=device)

criterion_cls = nn.BCEWithLogitsLoss().to(device)
criterion_scores = nn.MSELoss(reduction='none').to(device)
criterion_scores_l1 = nn.L1Loss(reduction='none').to(device)

rnn_model = MultiModel(model=BiLSTMClassifier(300, len(TEXT.vocab.stoi), 256, 2).to(device))

optimizer = optim.Adam([param for param in rnn_model.model.parameters() if param.requires_grad])
fit(rnn_model, criterion_cls, criterion_scores, optimizer, train_iter, epochs_count=30, val_data=val_iter)
torch.save(rnn_model.model.state_dict(), model_path)

do_eval_epoch(rnn_model, None, criterion_scores_l1, val_iter)
コード例 #19
0
ファイル: train.py プロジェクト: abhtri/logcounting
def train(dataset_name, model_name, metric_name, path_history, path_model, path_opt, path_best_model, reset=False):  
  # SET SEED
  np.random.seed(1)
  torch.manual_seed(1) 
  torch.cuda.manual_seed_all(1)

  # Train datasets
  transformer = ut.ComposeJoint(
                    [ut.RandomHorizontalFlipJoint(),            
                    [transforms.ToTensor(), None],
                    [transforms.Normalize(*ut.mean_std), None],
                    [None,  ut.ToLong() ]
                    ])

  train_set = dataset_dict[dataset_name](split="train", 
                                         transform_function=transformer)
  
  trainloader = torch.utils.data.DataLoader(train_set, batch_size=1, 
                                            num_workers=0,
                                            drop_last=False,
                                            sampler=ut.RandomSampler(train_set))
  # Val datasets
  transformer = ut.ComposeJoint(
                    [
                         [transforms.ToTensor(), None],
                         [transforms.Normalize(*ut.mean_std), None],
                         [None,  ut.ToLong() ]
                    ])  

  val_set = dataset_dict[dataset_name](split="val", 
                                       transform_function=transformer)

  test_set = dataset_dict[dataset_name](split="test", 
                                       transform_function=transformer)

 

  # Model 
  model = model_dict[model_name](train_set.n_classes).cuda()
  opt = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()),
                         lr=1e-5, weight_decay=0.0005)

  # Train
  if os.path.exists(path_history) and not reset:
    history = ut.load_json(path_history)
    model.load_state_dict(torch.load(path_model))
    opt.load_state_dict(torch.load(path_opt))
    s_epoch = history["train"][-1]["epoch"]
    print("Resuming epoch...{}".format(s_epoch))

  else:
    history = {"train":[], "val":[], "test":[],
               "model_name":model_name,
               "dataset_name":dataset_name, 
               "path_model":path_model,
               "path_opt":path_opt,
               "path_best_model":path_best_model,
               "best_val_epoch":-1, "best_val_mae":np.inf}
    s_epoch = 0
    print("Starting from scratch...")
  

  for epoch in range(s_epoch + 1, 1000):    
    train_dict = ut.fit(model, trainloader, opt, 
                        loss_function=losses.lc_loss,
                        epoch=epoch)
    
    # Update history
    history["trained_images"] = list(model.trained_images)
    history["train"] += [train_dict]

    # Save model, opt and history
    torch.save(model.state_dict(), path_model)
    torch.save(opt.state_dict(), path_opt)
    ut.save_json(path_history, history)

    # %%%%%%%%%%% 2. VALIDATION PHASE %%%%%%%%%%%%"
    with torch.no_grad():      
      val_dict = ut.val(model=model, dataset=val_set, epoch=epoch, 
                        metric_name=metric_name)

      # Update history
      history["val"] += [val_dict]

      # Lower is better
      if val_dict[metric_name] <= history["best_val_mae"]:
        history["best_val_epoch"] = epoch
        history["best_val_mae"] = val_dict[metric_name]
        torch.save(model.state_dict(), path_best_model)

        # Test Model
        if not (dataset_name == "penguins" and epoch < 50):
          testDict = ut.val(model=model, 
                                dataset=test_set, 
                                epoch=epoch, metric_name=metric_name)
          history["test"] += [testDict]
        
      ut.save_json(path_history, history)
コード例 #20
0
def main():
    print("Starting DFC2021 baseline training script at %s" % (str(datetime.datetime.now())))


    #-------------------
    # Setup
    #-------------------
    assert os.path.exists(args.input_fn)

    if os.path.isfile(args.output_dir):
        print("A file was passed as `--output_dir`, please pass a directory!")
        return

    if os.path.exists(args.output_dir) and len(os.listdir(args.output_dir)):
        if args.overwrite:
            print("WARNING! The output directory, %s, already exists, we might overwrite data in it!" % (args.output_dir))
        else:
            print("The output directory, %s, already exists and isn't empty. We don't want to overwrite and existing results, exiting..." % (args.output_dir))
            return
    else:
        print("The output directory doesn't exist or is empty.")
        os.makedirs(args.output_dir, exist_ok=True)

    if torch.cuda.is_available():
        device = torch.device("cuda:%d" % args.gpu)
    else:
        print("WARNING! Torch is reporting that CUDA isn't available, exiting...")
        return

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)


    #-------------------
    # Load input data
    #-------------------
    input_dataframe = pd.read_csv(args.input_fn)
    image_fns = input_dataframe["image_fn"].values
    label_fns = input_dataframe["label_fn"].values
    groups = input_dataframe["group"].values

    dataset = StreamingGeospatialDataset(
        imagery_fns=image_fns, label_fns=label_fns, groups=groups, chip_size=CHIP_SIZE, num_chips_per_tile=NUM_CHIPS_PER_TILE, windowed_sampling=False, verbose=False,
        image_transform=image_transforms, label_transform=label_transforms, nodata_check=nodata_check
    )

    dataloader = torch.utils.data.DataLoader(
        dataset,
        batch_size=args.batch_size,
        num_workers=NUM_WORKERS,
        pin_memory=True,
    )

    num_training_batches_per_epoch = int(len(image_fns) * NUM_CHIPS_PER_TILE / args.batch_size)
    print("We will be training with %d batches per epoch" % (num_training_batches_per_epoch))


    #-------------------
    # Setup training
    #-------------------
    if args.model == "unet":
        model = models.get_unet()
    elif args.model == "fcn":
        model = models.get_fcn()
    else:
        raise ValueError("Invalid model")

    model = model.to(device)
    optimizer = optim.AdamW(model.parameters(), lr=0.001, amsgrad=True)
    criterion = nn.CrossEntropyLoss()
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min")

    print("Model has %d parameters" % (utils.count_parameters(model)))


    #-------------------
    # Model training
    #-------------------
    training_task_losses = []
    num_times_lr_dropped = 0 
    model_checkpoints = []
    temp_model_fn = os.path.join(args.output_dir, "most_recent_model.pt")

    for epoch in range(args.num_epochs):
        lr = utils.get_lr(optimizer)

        training_losses = utils.fit(
            model,
            device,
            dataloader,
            num_training_batches_per_epoch,
            optimizer,
            criterion,
            epoch,
        )
        scheduler.step(training_losses[0])

        model_checkpoints.append(copy.deepcopy(model.state_dict()))
        if args.save_most_recent:
            torch.save(model.state_dict(), temp_model_fn)

        if utils.get_lr(optimizer) < lr:
            num_times_lr_dropped += 1
            print("")
            print("Learning rate dropped")
            print("")
            
        training_task_losses.append(training_losses[0])
            
        if num_times_lr_dropped == 4:
            break


    #-------------------
    # Save everything
    #-------------------
    save_obj = {
        'args': args,
        'training_task_losses': training_task_losses,
        "checkpoints": model_checkpoints
    }

    save_obj_fn = "results.pt"
    with open(os.path.join(args.output_dir, save_obj_fn), 'wb') as f:
        torch.save(save_obj, f)
コード例 #21
0
def bayesianOptimization(func_objective,
                         func_cost,
                         func_acq,
                         func_policy,
                         bounds,
                         depth_cost,
                         givenCost,
                         initial_n=1,
                         initpoint=None,
                         n_sample=10,
                         decay_rate=1,
                         length_scale=0.3,
                         ARD_Flag=False):
    """
    depth_h: num of nest
    N: num of iter
    """
    assert depth_cost <= givenCost, "Error: depth_cost > givenCost"
    #assert initial_n <= N, "Error: initial_n > N"

    _length_scale = length_scale * (bounds[0][1] - bounds[0][0])

    # load/generate init points
    if initial_n > 0:
        queries = generate_init(bounds, initial_n)
    else:
        queries = initpoint

    initial_cost = np.sum(func_cost(queries))
    remainCost = givenCost - initial_cost
    values = func_objective(queries)
    count = 0

    while remainCost > 1:  # assume that min(func_cost) is 1
        count = count + 1
        print(count)
        kernel = GPy.kern.RBF(len(bounds),
                              ARD=ARD_Flag,
                              lengthscale=length_scale)
        _remainC = min({depth_cost, remainCost})
        GP_model = fit(queries, values, kernel)

        # define acquisition function
        if func_acq == ei:
            facq = lambda x: -1 * ei(x, bounds, GP_model)
        elif func_acq == ei_per_cost:
            facq = lambda x: -1 * ei_per_cost(x, func_cost, bounds, GP_model)
        else:
            facq = lambda x: -1 * func_acq(x,
                                           bounds=bounds,
                                           func_policy=func_policy,
                                           func_cost=func_cost,
                                           depth_c=_remainC,
                                           _queries=queries,
                                           _values=values,
                                           n_sample=n_sample,
                                           decay_rate=decay_rate,
                                           ARD_Flag=ARD_Flag,
                                           length_scale=_length_scale)

        # compute threthold(lastpoint)
        muquery = policy_mu(GP_model, bounds)
        threthold = _remainC - func_cost(muquery)

        # select next query given that threthold
        X = minimize_with_threthold2d(facq, func_cost, bounds, threthold)
        Y = func_objective(X)

        # subtract cost from remain cost
        remainCost = remainCost - func_cost(X)
        queries = np.concatenate([queries, X])
        values = np.concatenate([values, Y])
    return queries, values
コード例 #22
0
def main():

    parser = argparse.ArgumentParser(description='reaction rates regression')

    parser.add_argument(
        '-p',
        '--process',
        type=str,
        choices=['DR', 'VT', 'VV', 'VV2', 'ZR'],
        default='DR,VT,VV,VV2,ZR',
        help='Comma-separated names of properties whose regression is performed'
    )

    parser.add_argument('-a',
                        '--algorithm',
                        type=str,
                        choices=[
                            'DT', 'RF', 'ET', 'GP', 'KN', 'SVM', 'KR', 'GB',
                            'HGB', 'MLP'
                        ],
                        default='DT',
                        help='regression algorithm')

    args = parser.parse_args()

    process = args.process.split(',')
    directory = process[0] + '/data/processes'
    path = directory + "/*.csv"
    print("Process: ", colored(process[0], 'green'))

    algorithm = args.algorithm.split(',')
    print("Algorithm: ", colored(algorithm[0], 'blue'))

    parent_dir = "."
    print("PWD: ", colored(parent_dir, 'yellow'))

    n_jobs = 2

    for f in glob.glob(path):
        #print("{bcolors.OKGREEN}f{bcolors.ENDC}")
        print(colored(f, 'red'))
        dataset_k = pd.read_csv(f, delimiter=",").to_numpy()
        dataset_T = pd.read_csv(parent_dir + "/" + process[0] +
                                "/data/Temperatures.csv").to_numpy()

        x = dataset_T.reshape(-1, 1)
        y = dataset_k

        print("### Phase 1: PRE_PROCESSING ###")
        ########################################
        '''
        https://stackoverflow.com/questions/50565937/how-to-normalize-the-train-and-test-data-using-minmaxscaler-sklearn
        https://towardsdatascience.com/6-amateur-mistakes-ive-made-working-with-train-test-splits-916fabb421bb
        https://www.analyticsvidhya.com/blog/2020/04/feature-scaling-machine-learning-normalization-standardization/
        https://towardsdatascience.com/scale-standardize-or-normalize-with-scikit-learn-6ccc7d176a02

        You should fit the MinMaxScaler using the training data and
        then apply the scaler on the testing data before the prediction.

        In summary:

        Step 1: fit the scaler on the TRAINING data
        Step 2: use the scaler to transform the TRAINING data
        Step 3: use the transformed training data to fit the predictive model
        Step 4: use the scaler to transform the TEST data
        Step 5: predict using the trained model (step 3) and the transformed TEST data (step 4).

        data = datasets.load_iris()
        X    = data.data
        y    = data.target

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

        scaler = MinMaxScaler()
        X_train_scaled = scaler.fit_transform(X_train)

        model = SVC()
        model.fit(X_train_scaled, y_train)

        X_test_scaled = scaler.transform(X_test)
        y_pred = model.predict(X_test_scaled)

        '''
        data, dir, proc, model, scaler, figure, outfile = utils.mk_tree(
            f, parent_dir, process[0], algorithm[0])

        # Train/test split dataset
        x_train, x_test, y_train, y_test = train_test_split(x,
                                                            y,
                                                            train_size=0.75,
                                                            test_size=0.25,
                                                            random_state=69)

        # Define scalers: they can be modified to investigate the effect of scalers
        ##############################################################################
        input_scaler = None  #MinMaxScaler(feature_range=(-1,1))
        output_scaler = None  #StandardScaler()
        ##############################################################################

        # Scale None and/or inputs and/or outputs
        x_train, x_test, y_train, y_test = utils.scale_dataset(
            x_train, x_test, y_train, y_test, input_scaler, output_scaler)

        print('Training Features Shape:', x_train.shape)
        print('Training Labels Shape:', y_train.shape)
        print('Testing Features Shape:', x_test.shape)
        print('Testing Labels Shape:', y_test.shape)

        # Save scalers (they may be useful)
        dump(input_scaler, open(scaler + "/scaler_x_MO_" + data + '.pkl',
                                'wb'))
        dump(output_scaler, open(scaler + "/scaler_y_MO_" + data + '.pkl',
                                 'wb'))

        if (algorithm[0] == 'DT'):
            est, hyper_params = estimators.est_DT()

        elif (algorithm[0] == 'ET'):
            est, hyper_params = estimators.est_ET()

        elif (algorithm[0] == 'SVM'):
            est, hyper_params = estimators.est_SVM()

        elif (algorithm[0] == 'KR'):
            est, hyper_params = estimators.est_KR()

        elif (algorithm[0] == 'KN'):
            est, hyper_params = estimators.est_KN()

        elif (algorithm[0] == 'MLP'):
            est, hyper_params = estimators.est_MLP()

        elif (algorithm[0] == 'GB'):
            est, hyper_params = estimators.est_GB()

        elif (algorithm[0] == 'HGB'):
            est, hyper_params = estimators.est_HGB()

        elif (algorithm[0] == 'RF'):
            est, hyper_params = estimators.est_RF()

        elif (algorithm[0] == 'GB'):
            est, hyper_params = estimators.est_GB()

        else:
            print("Algorithm not implemented ...")

        # https://github.com/ray-project/tune-sklearn
        # https://docs.ray.io/en/latest/tune/api_docs/sklearn.html#tune-sklearn-docs
        # class ray.tune.sklearn.TuneGridSearchCV(estimator, param_grid, early_stopping=None, scoring=None,
        # n_jobs=None, cv=5, refit=True, verbose=0, error_score='raise', return_train_score=False,
        # local_dir='~/ray_results', max_iters=1, use_gpu=False, loggers=None, pipeline_auto_early_stop=True,
        # stopper=None, time_budget_s=None, sk_n_jobs=None)
        #scheduler = MedianStoppingRule(grace_period=10.0)
        #gs = TuneGridSearchCV(est, cv=10, param_grid=hyper_params, verbose=2, n_jobs=n_jobs, scoring='r2',
        #                  refit=True, error_score=np.nan, return_train_score=True)
        #tune_search = TuneSearchCV(clf, parameter_grid, search_optimization="hyperopt", n_trials=3, early_stopping=scheduler, max_iters=10)
        #tune_search.fit(x_train, y_train)

        # Exhaustive search over specified parameter values for the estimator
        # https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html
        gs = GridSearchCV(est,
                          cv=5,
                          param_grid=hyper_params,
                          verbose=2,
                          n_jobs=n_jobs,
                          scoring='r2',
                          refit=True,
                          pre_dispatch='n_jobs',
                          error_score=np.nan,
                          return_train_score=True)

        # Randomized search on hyper parameters
        # https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html#sklearn.model_selection.RandomizedSearchCV
        # class sklearn.model_selection.RandomizedSearchCV(estimator, param_distributions, *, n_iter=10, scoring=None, n_jobs=None, refit=True,
        #                                                  cv=None, verbose=0, pre_dispatch='2*n_jobs', random_state=None, error_score=nan,
        #                                                  return_train_score=False)
        #gs = RandomizedSearchCV(est, cv=10, n_iter=10, param_distributions=hyper_params, verbose=2, n_jobs=n_jobs, scoring='r2',
        #                        refit=True, pre_dispatch='n_jobs', error_score=np.nan, return_train_score=True)

        # Training
        utils.fit(x_train, y_train, gs, outfile)

        results = pd.DataFrame(gs.cv_results_)
        # https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_csv.html
        #compression_opts = dict(method='zip', archive_name='GridSearchCV_results.csv')
        #results.to_csv('GridSearchCV_results.zip', index=False, compression=compression_opts)
        results.to_csv(model + "/../" + "GridSearchCV_results.csv",
                       index=False,
                       sep='\t',
                       encoding='utf-8')

        #plt.figure(figsize=(12, 4))
        #for score in ['mean_test_recall', 'mean_test_precision', 'mean_test_min_both']:
        #    plt.plot([_[1] for _ in results['param_class_weight']], results[score], label=score)
        #plt.legend();

        #plt.figure(figsize=(12, 4))
        #for score in ['mean_train_recall', 'mean_train_precision', 'mean_test_min_both']:
        #    plt.scatter(x=[_[1] for _ in results['param_class_weight']], y=results[score.replace('test', 'train')], label=score)
        #plt.legend();

        # summarize results
        print("Best: %f using %s" % (gs.best_score_, gs.best_params_))
        means = gs.cv_results_['mean_test_score']
        stds = gs.cv_results_['std_test_score']
        params = gs.cv_results_['params']
        for mean, stdev, param in zip(means, stds, params):
            print("%f (%f) with: %r" % (mean, stdev, param))

        # Perform prediction
        y_regr = utils.predict(x_test, gs, outfile)

        # Compute the scores
        utils.scores(input_scaler, output_scaler, x_train, y_train, x_test,
                     y_test, model, gs, outfile)

        # Transform back
        x_train, x_test, y_train, y_test, y_regr = utils.scale_back_dataset(
            x_train, x_test, y_train, y_test, y_regr, input_scaler,
            output_scaler)

        # Make figures
        utils.draw_plot(x_test, y_test, y_regr, figure, data)

        # save the model to disk
        dump(gs, model + "/model_MO_" + data + '.sav')
コード例 #23
0
ファイル: train.py プロジェクト: DADADA-X/ChangeDetection
def main():
    # print("Starting DFC2021 baseline training script at %s" % (str(datetime.datetime.now())))
    #-------------------
    # Setup
    #-------------------
    assert os.path.exists(args.train_fn)
    assert os.path.exists(args.valid_fn)

    now_time = datetime.datetime.now()
    time_str = datetime.datetime.strftime(now_time, '%m-%d_%H-%M-%S')
    # output path
    # output_dir = Path(args.output_dir).parent / time_str / Path(args.output_dir).stem
    output_dir = Path(args.output_dir)
    output_dir.mkdir(exist_ok=True, parents=True)
    logger = utils.init_logger(output_dir / 'info.log')
    # if os.path.isfile(args.output_dir):
    #     print("A file was passed as `--output_dir`, please pass a directory!")
    #     return
    #
    # if os.path.exists(args.output_dir) and len(os.listdir(args.output_dir)):
    #     if args.overwrite:
    #         print("WARNING! The output directory, %s, already exists, we might overwrite data in it!" % (args.output_dir))
    #     else:
    #         print("The output directory, %s, already exists and isn't empty. We don't want to overwrite and existing results, exiting..." % (args.output_dir))
    #         return
    # else:
    #     print("The output directory doesn't exist or is empty.")
    #     os.makedirs(args.output_dir, exist_ok=True)

    if args.gpu is not None:
        os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu

    n_gpu = torch.cuda.device_count()
    device = torch.device('cuda:0' if n_gpu > 0 else 'cpu')
    device_ids = list(range(n_gpu))

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    #-------------------
    # Load input data
    #-------------------

    train_dataframe = pd.read_csv(args.train_fn)
    train_image_fns = train_dataframe["image_fn"].values
    train_label_fns = train_dataframe["label_fn"].values
    train_groups = train_dataframe["group"].values
    train_dataset = StreamingGeospatialDataset(
        imagery_fns=train_image_fns, label_fns=train_label_fns, groups=train_groups, chip_size=CHIP_SIZE,
        num_chips_per_tile=NUM_CHIPS_PER_TILE, transform=transform, nodata_check=nodata_check
    )

    valid_dataframe = pd.read_csv(args.valid_fn)
    valid_image_fns = valid_dataframe["image_fn"].values
    valid_label_fns = valid_dataframe["label_fn"].values
    valid_groups = valid_dataframe["group"].values
    valid_dataset = StreamingValidationDataset(
        imagery_fns=valid_image_fns, label_fns=valid_label_fns, groups=valid_groups, chip_size=CHIP_SIZE,
        stride=CHIP_SIZE, transform=transform, nodata_check=nodata_check
    )

    train_dataloader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=args.batch_size,
        num_workers=NUM_WORKERS,
        pin_memory=True,
    )
    valid_dataloader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=args.batch_size,
        num_workers=NUM_WORKERS,
        pin_memory=True,
    )

    num_training_images_per_epoch = int(len(train_image_fns) * NUM_CHIPS_PER_TILE)
    # print("We will be training with %d batches per epoch" % (num_training_batches_per_epoch))

    #-------------------
    # Setup training
    #-------------------
    # if args.model == "unet":
    #     model = models.get_unet()
    # elif args.model == "fcn":
    #     model = models.get_fcn()
    # else:
    #     raise ValueError("Invalid model")

    model = models.isCNN(args.backbone)

    weights_init(model, seed=args.seed)

    model = model.to(device)
    if len(device_ids) > 1:
        model = torch.nn.DataParallel(model, device_ids=device_ids)

    trainable_params = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = optim.AdamW(trainable_params, lr=INIT_LR, amsgrad=True, weight_decay=5e-4)
    lr_criterion = nn.CrossEntropyLoss(ignore_index=0) # todo
    hr_criterion = hr_loss
    # criterion = balanced_ce_loss
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, "min", factor=0.5, patience=3, min_lr=0.0000001)
    # factor=0.5, patience=3, min_lr=0.0000001
    logger.info("Trainable parameters: {}".format(utils.count_parameters(model)))

    #-------------------
    # Model training
    #-------------------
    train_loss_total_epochs, valid_loss_total_epochs, epoch_lr = [], [], []
    best_loss = 1e50
    num_times_lr_dropped = 0
    # model_checkpoints = []
    # temp_model_fn = os.path.join(output_dir, "most_recent_model.pt")

    for epoch in range(args.num_epochs):
        lr = utils.get_lr(optimizer)

        train_loss_epoch, valid_loss_epoch = utils.fit(
            model,
            device,
            train_dataloader,
            valid_dataloader,
            num_training_images_per_epoch,
            optimizer,
            lr_criterion,
            hr_criterion,
            epoch,
            logger)

        scheduler.step(valid_loss_epoch)

        if epoch % config.SAVE_PERIOD == 0 and epoch != 0:
            temp_model_fn = output_dir / 'checkpoint-epoch{}.pth'.format(epoch+1)
            torch.save(model.state_dict(), temp_model_fn)

        if valid_loss_epoch < best_loss:
            logger.info("Saving model_best.pth...")
            temp_model_fn = output_dir / 'model_best.pth'
            torch.save(model.state_dict(), temp_model_fn)
            best_loss = valid_loss_epoch

        if utils.get_lr(optimizer) < lr:
            num_times_lr_dropped += 1
            print("")
            print("Learning rate dropped")
            print("")

        train_loss_total_epochs.append(train_loss_epoch)
        valid_loss_total_epochs.append(valid_loss_epoch)
        epoch_lr.append(lr)
コード例 #24
0
ファイル: bo.py プロジェクト: TakuTsuzuki/TimecostBO
def bayesianOptimization(func_objective,
                         func_acq,
                         func_policy,
                         bounds,
                         depth_h,
                         N,
                         initial_n=1,
                         initpoint=None,
                         N_q=3,
                         n_sample=10,
                         decay_rate=1,
                         ARD_Flag=False,
                         length_scale=None):
    """
    depth_h: num of nest
    N: num of iter
    """
    assert depth_h <= N, "Error: depth_h > N"
    assert initial_n <= N, "Error: initial_n > N"

    _N = N - initial_n
    if initial_n > 0:
        queries = generate_init(bounds, initial_n)
    else:
        queries = initpoint

    values = func_objective(queries)
    length_scale = (bounds[0][1] - bounds[0][0]) / 10.
    for i in range(_N):
        print(i)
        kernel = GPy.kern.RBF(len(bounds),
                              ARD=ARD_Flag,
                              lengthscale=length_scale)
        #gp_model = fit(queries, values)
        _h = min({depth_h, _N - i})
        #_count_depth = 0
        #_gp_list = {}
        #_queries_list = {}
        #_values_list = {}
        #_trajectory = []
        #_U = 0
        #_idlist = []
        if func_acq == ei:
            GP_model = fit(queries, values, kernel)
            facq = lambda x: -1 * ei(x, bounds, GP_model)
        else:
            facq = lambda x: -1 * func_acq(x,
                                           bounds=bounds,
                                           func_policy=func_policy,
                                           depth_h=_h,
                                           _queries=queries,
                                           _values=values,
                                           N_q=N_q,
                                           n_sample=10,
                                           decay_rate=decay_rate,
                                           ARD_Flag=ARD_Flag,
                                           length_scale=length_scale)
        X = minimize(facq, bounds)
        Y = func_objective(X)
        queries = np.concatenate([queries, X])
        values = np.concatenate([values, Y])
    return queries, values