def predict_canceled(): # make train & test same dimension notinclude = ['ID', 'is_canceled', 'adr' , 'reservation_status' , 'reservation_status_date'] # should be preserved not be encode notoneHot = ['lead_time', 'adults', 'children', 'babies', 'days', 'total_of_special_requests'] # redunancy info for predict canceled drop_canceled = ['agent', 'country', 'arrival_date_week_number','stays_in_weekend_nights', 'stays_in_week_nights','date'] df = DataReader('train.csv', 'test.csv', notinclude, drop_canceled, notoneHot, []) df.drop_encode() train_y, train_x, test_x = df.getTrainTest_cancel_np() sklearn(train_y,train_x,test_x) # model = train(train_y, train_x) # predict_canceled_forTest(model,test_x,df_Obj) # model = svm_load_model('is_canceled_0.model') # predict_canceled_forTest(model, test_x, df_Obj) clf = load('sklearn_ada') cancel_predict = clf.predict(test_x) # test_cancel_label = [] # p = [] # with open ('Test_is_canceled_Label.txt', 'r') as f: # line = f.readline() # # print(line) # test_cancel_label = line.split(' ') # # for i in test_cancel_label: # if i == '0.0': # p.append(0) # elif i == '1.0': # p.append(1) # df.add_column_to_test(np.array(p)) # # return np.array(p),df return cancel_predict,df
def test(filename, depth, trees, replacement, sample, folds, verbosity, discretization): reader = DataReader(discretization_level=discretization) data_set = reader.read_csv(filename) tree_creator = DecisionTreeCreator(gain, max_depth=depth) forest_creator = RandomForestCreator(tree_creator, trees, with_replacement=replacement, sample_size=sample) validator = CrossValidation(forest_creator, folds, verbosity_level=verbosity) return validator.validate(data_set)
def generate_data(self): data_path = "../tests/test.csv" reader = DataReader() list_of_dictionaries, headers = reader.read_csv(data_path) data_converter = DataConverter("KOAC") dataset = data_converter.convert_csv(data_path) #streamer = DataStreamer() #batch_size = 10 #batches = streamer.create_batches(dataset, batch_size) return dataset
def train(**kwargs): ''' 训练 ''' # 根据命令行参数更新配置 opt._parse(kwargs) vis = Visulizer(opt.env) # step1: 模型 model = getattr(models, opt.model)() if opt.load_model_path: model.load(opt.load_model_path) if opt.use_gpu: model.cuda() # step2: 数据 train_data = DataReader(opt.train_data_root, train=True) val_data = DataReader(opt.train_data_root, train=False) train_dataloader = DataLoader( train_data, opt.batch_size, shuffle=True, num_workers=opt.num_workers, ) val_dataloader = DataLoader( val_data, opt.batch_size, shuffle=False, num_workers=opt.num_workers, ) # step3: 目标函数和优化器 criterion = nn.CrossEntropyLoss() lr = opt.lr optimizer = t.optim.Adam(model.parameters(), lr=lr, weigth_decay=opt.weight_decay) # step4:统计指标:平滑处理后的损失,还有混淆矩阵 loss_meter = meter.AverageValueMeter() confusion_matrix = meter.ConfusionMeter(2) previous_loss = 1e10 for epoch in range(opt.max_epoch): pass
def start(self): GPIO.setmode(GPIO.BCM) GPIO.setwarnings(False) GPIO.setup(LED_PIN, GPIO.OUT) GPIO.setup(BUTTON_PIN, GPIO.IN, pull_up_down=GPIO.PUD_UP) LightIndicator.turnOff() self.data_reader = DataReader() GPIO.add_event_detect(BUTTON_PIN, GPIO.FALLING, callback=self.toggle_read, bouncetime=500) while True: time.sleep(0.5)
def plot_visualizer(classifier_name, subset1, subset2): """Plotter based on plot_irirs from lecture-gitHub Args: Classifier(string): knn or svc subset1(string): Feature1 from csv subset2(string): Feature1 from csv """ if (classifier_name == "knn"): classifier = KNeighborsClassifier(n_neighbors=3) if (classifier_name == "svc"): classifier = SVC(gamma='scale') dataset = "diabetes.csv" df = DataReader(dataset) cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF']) cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF']) X = df.data_train[[subset1, subset2]] y = df.target_train.values #Setting pos/neg to 1/0 y = [0 if e == "neg" else 1 for e in y] classifier.fit(X, y) x_min, x_max = X[subset1].min() - 1, X[subset1].max() + 1 y_min, y_max = X[subset2].min() - 1, X[subset2].max() + 1 xx, yy = np.meshgrid(np.linspace(x_min, x_max, 300), np.linspace(y_min, y_max, 300)) Z = classifier.predict(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) pl.figure() pl.pcolormesh(xx, yy, Z, cmap=cmap_light) pl.scatter(X[subset1], X[subset2], c=y, cmap=cmap_bold) pl.xlabel(subset1) pl.ylabel(subset2) pl.axis('tight') savename = classifier_name + subset1 + subset2 pl.savefig("static/" + savename + ".png") #pl.show() data_test = df.data_test[[subset1, subset2]] target_test = df.target_test.values #Setting pos/neg to 1/0 target_test = [0 if e == "neg" else 1 for e in target_test] target_pred = classifier.predict(data_test) results = metrics.accuracy_score(target_test, target_pred) return results
def main(): cube = img_as_float(DataReader.PaviauRaw().cube) image = cube[:, :, [40, 17, 1]] i_sp = image.shape image = np.reshape(image, (-1, i_sp[-1])) image = StandardScaler().fit_transform(image) image = np.reshape(image, i_sp) numSegments = 2500 num_sample = 3 num_epoch = 1000 train_csv = pd.read_csv("data/splitDataset/train/splitPavia_{}.csv".format(num_sample)) test_csv = pd.read_csv("data/splitDataset/test/splitPavia_{}.csv".format(num_sample)) training_set = train_csv.loc[:, ["row_0", "col_0", "label_0"]].to_numpy() testing_set = test_csv.loc[:, ["row_0", "col_0", "label_0"]].to_numpy() global_train_mask = getGlobalMask(DataReader.PaviauRaw().truth, training_set) global_test_mask = getGlobalMask(DataReader.PaviauRaw().truth, testing_set) segments, edge_index = getSuperpixelGraph(image, num_segments=numSegments, compactness=2.5, sigma=2) train_mask, test_mask, y = getMaskAndLable(training_set, DataReader.PaviauRaw().truth, segments) sp_feature = getSuperpixelFeature(DataReader.PaviauRaw().cube, segments) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = Net(103, 9).to(device) # x = torch.Tensor(PCA(5).fit_transform(sp_feature)) x = torch.Tensor(sp_feature) edge_index = torch.tensor(edge_index).t().contiguous() showSuperpixel(image, segments) data = Data(x=x, edge_index=edge_index, test_mask=test_mask, train_mask=train_mask, y=y) data.test_mask = torch.tensor(test_mask) data.train_mask = torch.tensor(train_mask) data.y = torch.tensor(y) data = data.to(device) gpu_segments = torch.tensor(segments).flatten().to(device) global_train_mask = torch.tensor(global_train_mask).to(device) gpu_truth = torch.tensor(DataReader.PaviauRaw().truth, dtype=torch.long).flatten().to(device) evaluate(model, gpu_truth, gpu_segments, global_test_mask)
import tensorflow as tf import os from model.net3d_model import net_3d import config as cfg import time import numpy as np from data import DataReader reader = DataReader('val') pointcloud_train, true_box_train = reader.provide(cfg.test_batch_size) training = tf.placeholder(dtype=tf.bool, shape=[], name='training') pointcloud = tf.placeholder(dtype=tf.float32, shape=[None, cfg.pc_size, cfg.pc_channel], name='pointclouds') model = net_3d() pred = model.inference(pointcloud, training) box, score, cls = model.predict(pred) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.9 # maximun alloc gpu50% of MEM config.gpu_options.allow_growth = True #allocate dynamically saver = tf.train.Saver() result = [] with tf.Session(config=config) as sess: saver.restore(sess, tf.train.latest_checkpoint(cfg.model_dir)) graph = tf.get_default_graph() coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) for i in range(cfg.test_num): start_time = time.time() pointcloud_value, true_box_value = sess.run( [pointcloud_train, true_box_train])
from crf_sequence_tagger import CRFSequenceTagger from data import WordTokenizeCorpusReader, DataReader from trainer import Trainer corpus = DataReader.load_tagged_corpus( "/home/anhv/.languageflow/datasets/VLSP2013-WTK/", train_file="train.txt", test_file="test.txt") features = [ # word unigram and bigram and trigram "T[-2]", "T[-1]", "T[0]", "T[1]", "T[2]", "T[-2,-1]", "T[-1,0]", "T[0,1]", "T[1,2]", "T[-2,0]", "T[-1,1]", "T[0,2]", "T[-2].lower", "T[-1].lower", "T[0].lower", "T[1].lower", "T[2].lower", "T[-2,-1].lower", "T[-1,0].lower",
import numpy as np from sklearn.linear_model import SGDClassifier from data import DataReader from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import SVC from sklearn import metrics diabetes_reader = DataReader("diabetes.csv") def fit(subset1, subset2, classifier="knn"): data = diabetes_reader.data_train[[subset1, subset2]] data_test = diabetes_reader.data_test[[subset1, subset2]] target = diabetes_reader.target_train target_test = diabetes_reader.target_test if classifier == "knn": knn = KNeighborsClassifier(n_neighbors=3) knn.fit(data, np.ravel(target)) target_pred = knn.predict(data_test) if classifier == "SVC": logreg = SVC(gamma='scale') logreg.fit(data, np.ravel(target)) print(np.ravel(target)) target_pred = logreg.predict(data_test) results = metrics.accuracy_score(target_test, target_pred) print(results)
from data import DataReader bedfilename = ("/data/seq2seq-data/GM12878_cells/peak/macs2/overlap/" "E116.GM12878_Lymphoblastoid_Cells.ENCODE.Duke_Crawford." "DNase-seq.merged.20bp.filt.50m.pf.pval0.1.500000." "naive_overlap.narrowPeak.gz") referenceGenome = 'hg19' flankLength = 400 fastaFname = "/data/seq2seq-data/hg19.fa" bigwigFname = ("/data/seq2seq-data/GM12878_cells/signal/macs2/rep1/" "E116.GM12878_Lymphoblastoid_Cells.ENCODE.Duke_Crawford." "DNase-seq.merged.20bp.filt.50m.pf.fc.signal.bigwig") reader = DataReader(bedfilename, referenceGenome, flankLength, fastaFname, bigwigFname) """ Iterate over batches of size 20: for _ in range(numBatches) batchX, batchY = reader.getBatch(20) doSomething(batchX, batchY) """
def main(args): if (not args.do_train) and (not args.do_valid) and (not args.do_test): raise ValueError('one of train/val/test mode must be choosed.') if args.init_checkpoint: override_config(args) elif args.data_path is None: raise ValueError('one of init_checkpoint/data_path must be choosed.') if args.do_train and args.save_path is None: raise ValueError('Where do you want to save your trained model?') if args.save_path and not os.path.exists(args.save_path): os.makedirs(args.save_path) # Write logs to checkpoint and console set_logger(args) data_reader = DataReader(args.data_path) num_entity = len(data_reader.entity_dict) num_relation = len(data_reader.relation_dict) logging.info('Model: {}'.format(args.model)) logging.info('Data Path: {}'.format(args.data_path)) logging.info('Num Entity: {}'.format(num_entity)) logging.info('Num Relation: {}'.format(num_relation)) logging.info('Num Train: {}'.format(len(data_reader.train_data))) logging.info('Num Valid: {}'.format(len(data_reader.valid_data))) logging.info('Num Test: {}'.format(len(data_reader.test_data))) if args.model == 'ModE': kge_model = ModE(num_entity, num_relation, args.hidden_dim, args.gamma) elif args.model == 'HAKE': kge_model = HAKE(num_entity, num_relation, args.hidden_dim, args.gamma, args.modulus_weight, args.phase_weight) logging.info('Model Parameter Configuration:') for name, param in kge_model.named_parameters(): logging.info('Parameter %s: %s, require_grad = %s' % (name, str(param.size()), str(param.requires_grad))) kge_model = kge_model.cuda() if args.do_train: # Set training dataloader iterator train_dataloader_head = DataLoader( TrainDataset(data_reader, args.negative_sample_size, BatchType.HEAD_BATCH), batch_size=args.batch_size, shuffle=True, num_workers=max(1, args.cpu_num // 2), collate_fn=TrainDataset.collate_fn ) train_dataloader_tail = DataLoader( TrainDataset(data_reader, args.negative_sample_size, BatchType.TAIL_BATCH), batch_size=args.batch_size, shuffle=True, num_workers=max(1, args.cpu_num // 2), collate_fn=TrainDataset.collate_fn ) train_iterator = BidirectionalOneShotIterator(train_dataloader_head, train_dataloader_tail) # Set training configuration current_learning_rate = args.learning_rate optimizer = torch.optim.Adam( filter(lambda p: p.requires_grad, kge_model.parameters()), lr=current_learning_rate ) warm_up_steps = args.max_steps // 2 if args.init_checkpoint: # Restore model from checkpoint directory logging.info('Loading checkpoint %s...' % args.init_checkpoint) checkpoint = torch.load(os.path.join(args.init_checkpoint, 'checkpoint')) init_step = checkpoint['step'] kge_model.load_state_dict(checkpoint['model_state_dict']) if args.do_train: current_learning_rate = checkpoint['current_learning_rate'] warm_up_steps = checkpoint['warm_up_steps'] optimizer.load_state_dict(checkpoint['optimizer_state_dict']) else: logging.info('Randomly Initializing %s Model...' % args.model) init_step = 1 step = init_step logging.info('Start Training...') logging.info('init_step = %d' % init_step) if not args.do_test: logging.info('learning_rate = %d' % current_learning_rate) logging.info('batch_size = %d' % args.batch_size) logging.info('hidden_dim = %d' % args.hidden_dim) logging.info('gamma = %f' % args.gamma) logging.info('adversarial_temperature = %f' % args.adversarial_temperature) if args.do_train: training_logs = [] # Training Loop for step in range(init_step, args.max_steps): log = kge_model.train_step(kge_model, optimizer, train_iterator, args) training_logs.append(log) if step >= warm_up_steps: if not args.no_decay: current_learning_rate = current_learning_rate / 10 logging.info('Change learning_rate to %f at step %d' % (current_learning_rate, step)) optimizer = torch.optim.Adam( filter(lambda p: p.requires_grad, kge_model.parameters()), lr=current_learning_rate ) warm_up_steps = warm_up_steps * 3 if step % args.save_checkpoint_steps == 0: save_variable_list = { 'step': step, 'current_learning_rate': current_learning_rate, 'warm_up_steps': warm_up_steps } save_model(kge_model, optimizer, save_variable_list, args) if step % args.log_steps == 0: metrics = {} for metric in training_logs[0].keys(): metrics[metric] = sum([log[metric] for log in training_logs]) / len(training_logs) log_metrics('Training average', step, metrics) training_logs = [] if args.do_valid and step % args.valid_steps == 0: logging.info('Evaluating on Test Dataset...') metrics = kge_model.test_step(kge_model, data_reader, ModeType.TEST, args) log_metrics('Valid', step, metrics) save_variable_list = { 'step': step, 'current_learning_rate': current_learning_rate, 'warm_up_steps': warm_up_steps } save_model(kge_model, optimizer, save_variable_list, args) if args.do_valid: logging.info('Evaluating on Valid Dataset...') metrics = kge_model.test_step(kge_model, data_reader, ModeType.VALID, args) log_metrics('Valid', step, metrics) if args.do_test: logging.info('Evaluating on Test Dataset...') metrics = kge_model.test_step(kge_model, data_reader, ModeType.TEST, args) log_metrics('Test', step, metrics)
test_mask[i] = False return train_mask, test_mask, seg_y if __name__ == "__main__": print(os.getcwd()) # construct the argument parser and parse the arguments # ap = argparse.ArgumentParser() # ap.add_argument("-i", "--image", required = True, help = "Path to the image") # args = vars(ap.parse_args()) # # load the image and convert it to a floating point data type # image = img_as_float(io.imread(args["image"])) # loop over the number of segments image = img_as_float(DataReader.KSCRaw().cube)[:, :, [40, 17, 1]] image = (image - np.min(image)) / (np.max(image) - np.min(image)) i_sp = image.shape # apply SLIC and extract (approximately) the supplied number # of segments # paviau image = img_as_float(DataReader.PaviauRaw().cube)[:, :, [100, 65, 3]] image = (image - np.min(image)) / (np.max(image) - np.min(image)) i_sp = image.shape # apply SLIC and extract (approximately) the supplied number # of segments numSegments = 2600 segments = slic(image, n_segments=numSegments, compactness=3,
import os import pickle as pkl from data import DataReader dirname = 'AI2-ScienceQuestions-V2.1-Jan2018' prefix = 'ElementarySchool' trainPath = os.path.join(dirname, prefix, 'Elementary-NDMC-' + 'Train.jsonl') testPath = os.path.join(dirname, prefix, 'Elementary-NDMC-' + 'Test.jsonl') devPath = os.path.join(dirname, prefix, 'Elementary-NDMC-' + 'Dev.jsonl') # reader = DataReader(trainPath, testPath, devPath) reader = DataReader('/home/akshit/train.jsonl', testPath, devPath) reader.createDataSet('Train') reader.createDataSet('Dev') reader.createDataSet('Test')
def train(): reader = DataReader(os.path.join(FLAGS.data_path, FLAGS.data_set), FLAGS.embedding_bag_size) train_data = reader.train_dataset eval_data = reader.dev_dataset iterator = tf.data.Iterator.from_structure(train_data.output_types, train_data.output_shapes) batch_data = iterator.get_next() start = batch_data['start'] path = batch_data['path'] end = batch_data['end'] score = batch_data['score'] original_features = batch_data['original_features'] train_init_op = iterator.make_initializer(train_data) eval_init_op = iterator.make_initializer(eval_data) with tf.variable_scope("code2vec_model"): opt = Option(reader) train_model = Code2VecModel(start, path, end, score, original_features, opt) train_op = utils.get_optimizer(FLAGS.optimizer, FLAGS.learning_rate).minimize(train_model.loss) with tf.variable_scope('code2vec_model', reuse=True): eval_opt = Option(reader, training=False) eval_model = Code2VecModel(start, path, end, score, original_features, eval_opt) session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) with tf.Session(config=session_conf) as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) min_eval_loss = PriorityQueue(maxsize=3) stable_min_loss = 0 for i in range(1000): start_time = time.time() train_loss, train_acc = evaluate(sess, train_model, batch_data, train_init_op, train_op) eval_loss, eval_acc = evaluate(sess, eval_model, batch_data, eval_init_op) eval_reg_loss, eval_reg_acc = evaluate(sess, train_model, batch_data, eval_init_op) if not min_eval_loss.full(): min_eval_loss.put(-eval_loss) stable_min_loss = 0 else: k = min_eval_loss.get() if k >= -eval_loss: stable_min_loss += 1 else: stable_min_loss = 0 min_eval_loss.put(max(k, -eval_loss)) if opt.classification > 0: tf.logging.info( 'Epoch %2d: train-loss: %.5f (acc=%.2f), val-loss: %.5f (acc=%.2f), min-loss: %.5f, cost: %.4f s' % (i + 1, train_loss, train_acc, eval_loss, eval_acc, float(-np.mean(min_eval_loss.queue)), time.time() - start_time)) else: tf.logging.info( 'Epoch %2d: train-loss: %.5f, val-reg: %.5f, val-loss: %.5f, min-loss: %.5f, cost: %.4f s, attention_orthogonal_penalty: %.4f, fusion_penalty: %4f, encoding_weight_L2: %4f' % (i + 1, train_loss, eval_reg_loss, eval_loss, float(-np.mean(min_eval_loss.queue)), time.time() - start_time, train_model.regularizations['attention_orthogonal_penalty'].eval(), train_model.regularizations['fusion_penalty'].eval(), train_model.regularizations['encoding_weight_L2'].eval())) if stable_min_loss >= 5 and i >= 200: break
from ResnetFace import make_models from keras.optimizers import Adam from data import DataReader, TripletGenerator import config from os.path import join ''' This fine-tuning is inspired by triplet loss function appeared in the FaceNet paper https://arxiv.org/abs/1503.03832 I replaced the VGG model with ResNet50 trained on VGGFace2. ''' if __name__ == '__main__': ResnetModel, TripletModel = make_models() # train_data = DataReader(dir_images=config.path_LFW) # train_data = LFWReader(dir_images=config.path_LFW) train_data = DataReader(dir_images=config.train_data) train_generator = TripletGenerator(train_data) test_data = DataReader(dir_images=config.test_data) test_generator = TripletGenerator(test_data) # Set the numbers of trainable layers for layer in ResnetModel.layers[-10:]: print(layer.name) layer.trainable = True for layer in ResnetModel.layers[:-10]: print(layer.name) layer.trainable = False # for layer in ResnetModel.layers: # layer.trainable = True
import tensorflow as tf import os from model.net3d_model import net_3d import config as cfg import time from data import DataReader #prepare data reader = DataReader('train') pointcloud_train, true_box_train = reader.provide(cfg.batch_size) model = net_3d() yolo_out = [ int(cfg.pc_height / (8 * cfg.reso_height)), int(cfg.pc_width / (8 * cfg.reso_width)), cfg.num_anchors, 10 + cfg.num_classes ] training = tf.placeholder(dtype=tf.bool, shape=[], name='training') pointcloud = tf.placeholder(dtype=tf.float32, shape=[None, cfg.pc_size, cfg.pc_channel], name='pointclouds') true_box = tf.placeholder( dtype=tf.float32, shape=[None, yolo_out[0], yolo_out[1], yolo_out[2], yolo_out[3]], name='labels') pred_box = model.inference(pointcloud, training) loss, xyz_t, abg_p = model.loss(pred_box, true_box, cfg.anchors, training=True) global_step = tf.Variable(0, trainable=False, name='global_step') #decayed_learning_rate = learning_rate * 0.1 ^ (global_step / 3000) learning_rate = tf.train.exponential_decay(cfg.learning_rate, global_step, 3000, 0.1) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) train_op = optimizer.minimize(loss=loss, global_step=global_step)