def main(): print("This is program about steganography") print("The program is builed by Team 2 of L02 from KMA-HaNoi") print("How to use?") print("Put your images u wanto use to folder named input") print("This program have 02 mode are: Embeding and Extraction") print("With Embeding use form command like this (below):") print("python main.py -i [PATH_HOST_IMAGE] -w [PATH_WATERMARK_IMAGE] -m 0") print("With extraction use form command like this (below):") print("python main.py -i [PATH_HOST_IMAGE] -w [PATH_WATERMARK_IMAGE] -m 1") print("Output will be in folder named output") print("= = = = ==") print("= = = = = = = =") print("= = = == = = =") print("= = = = = = = ==") print("= = = = = =") print("= = = = = =") ap = argparse.ArgumentParser() ap.add_argument("-i", "--image", required=False, help="Host imgae") ap.add_argument("-w", "--watermark", required=False, help="Watermark image") ap.add_argument("-m", "--mode", required=True, help="Mode to use") args = vars(ap.parse_args()) print(args["mode"]) if args["mode"] == '0': print("Start Embeding") ed.embedding(pathhostimge=args["image"], pathwatermarkimage=args["watermark"]) print("have been done") elif args["mode"] == '1': ex.extraction()
def __init__(self, input_sequence, input_profile, training): self.embedding_layer = embedding.embedding(input_sequence) self.input_layer = tf.concat([self.embedding_layer, input_profile], axis=2) self.cnn_layer = cnn.cnns(self.input_layer, training) self.rnn_layer = self._recurrentLayer(training) self.logits = fc.fc(self.rnn_layer, self.cnn_layer, training) self.readout = tf.nn.softmax(self.logits) tf.summary.histogram('logits', self.logits)
def inference(sentence): print("input sentence:") print(sentence) sentences = [] words = sentence.split(' ') sentences.append(words) sentences_embedding = embedding(sentences, batch_size, single_sentence_length) print("input embedding:") print(sentences_embedding) output = mod_inference(sentences_embedding) print("output vector:") print(output) return output
def run(word_train, label_train, word_dev, label_dev, vocab, device, kf_index=0): # build dataset train_dataset = SegDataset(word_train, label_train, vocab, config.label2id) dev_dataset = SegDataset(word_dev, label_dev, vocab, config.label2id) # build data_loader train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True, collate_fn=train_dataset.collate_fn) dev_loader = DataLoader(dev_dataset, batch_size=config.batch_size, shuffle=True, collate_fn=dev_dataset.collate_fn) # get GloVe embedding if config.pretrained_embedding: embedding_weight = embedding(vocab) else: embedding_weight = None # model model = BiLSTM_CRF(embedding_size=config.embedding_size, hidden_size=config.hidden_size, vocab_size=vocab.vocab_size(), target_size=vocab.label_size(), num_layers=config.lstm_layers, lstm_drop_out=config.lstm_drop_out, nn_drop_out=config.nn_drop_out, pretrained_embedding=config.pretrained_embedding, embedding_weight=embedding_weight) model.to(device) # optimizer optimizer = optim.Adam(model.parameters(), lr=config.lr, betas=config.betas) scheduler = StepLR(optimizer, step_size=config.lr_step, gamma=config.lr_gamma) # how to initialize these parameters elegantly for p in model.crf.parameters(): _ = torch.nn.init.uniform_(p, -1, 1) # train and test train(train_loader, dev_loader, vocab, model, optimizer, scheduler, device, kf_index) with torch.no_grad(): # test on the final test set test_loss, f1 = test(config.test_dir, vocab, device, kf_index) return test_loss, f1
def run_sci(): parser = argparse.ArgumentParser(description=("SCI a method to predict" "sub-compartments from HiC" "data"), add_help=False) requiredArguments = parser.add_argument_group('Required arguments') requiredArguments.add_argument("-n", "--name", action="store", dest="name", help="Name of the experiment", type=str, required=True) requiredArguments.add_argument("-f", "--infile", action="store", dest="infile", help="Name of HiC interaction file", type=str, required=True) requiredArguments.add_argument("-r", "--resolution", action="store", dest="res", help=("Required resolution to predict" "compartments,provided bins size" "should have resolution greater than" "or equal the provided value"), type=int, required=True) requiredArguments.add_argument("-g", "--genome_size", action="store", dest="genome_size", help=("File containing chromosome size of" "the target genome"), type=str, required=True) optional = parser.add_argument_group('optional arguments') optional.add_argument("-h", "--help", action="help", help="show this help message and exit") optional.add_argument("-o", "--order", action="store", dest="order", help=("Graph order to consider when performing graph" "embedding. Available options are 1,2 or both." "Default: 1"), type=str, default="1") optional.add_argument("-s", "--samples", action="store", dest="samples", help=("Number of edges to sample in millions order" "from the graph. Default: 25"), type=int, default=25) optional.add_argument("-k", "--clusters", action="store", dest="clusters", help=("Nubmer of sub-compartments to be predicted." " Default: 5"), type=int, default=5) optional.add_argument("--adj", action="store", dest="adj_matrix", help="Adjaceny matrix file of the HiC graph", default=None, type=str) optional.add_argument("--alpha", action="store", dest="alpha", help=("Weight for graph embeddine optimization" " Default: 5"), type=float, default=0.5) oArgs = parser.parse_args() myobject = HicData(oArgs.res, oArgs.name) myobject.initialize(oArgs.genome_size) #myobject.load_interaction_data(oArgs.infile) hic_graph = myobject.write_inter_chrom_graph() GW_metadata = myobject.get_bins_info() if oArgs.adj_matrix is not None: myobject.write_GW_matrix(oArgs.adj_matrix) emb = embedding(oArgs.name, GW_metadata, oArgs.res) emb.make_embedding_file(hic_graph, oArgs.order, oArgs.samples)
def hl_solver (self, chempot=0., threshold=1.0e-12): # energy = self.mol.energy_nuc() energy = 0. nelec = 0. rdm_ao = np.dot(self.cf, self.cf.T) AX_val = np.dot(self.Sf, self.A_val) rdm_val = np.dot(AX_val.T, np.dot(rdm_ao, AX_val)) print ( "shapes" ) print ( "cf",self.cf.shape ) print ( "rdm_ao",rdm_ao.shape ) print ( "AX_val",AX_val.shape ) print ( "rdm_val",rdm_val.shape ) if(not self.parallel): myrange = range(self.nimp) else: from mpi4py import MPI comm = MPI.COMM_WORLD rank = MPI.COMM_WORLD.Get_rank() size = MPI.COMM_WORLD.Get_size() myrange = range(rank,rank+1) for i in myrange: # prepare orbital indexing imp_val = np.zeros((self.nvl,), dtype=bool) imp_val_ = np.zeros((self.nvl,), dtype=bool) if self.nc > 0: imp_core = np.zeros((self.nc,), dtype=bool) imp_core_ = np.zeros((self.nc,), dtype=bool) if self.nvt > 0: imp_virt = np.zeros((self.nvt,), dtype=bool) imp_virt_ = np.zeros((self.nvt,), dtype=bool) for k in range(self.mol.natm): if self.imp_atx[i][k]: imp_val[self.at_val == k] = True if self.nc > 0: imp_core[self.at_core == k] = True if self.nvt > 0: imp_virt[self.at_virt == k] = True if self.imp_at[i][k]: imp_val_[self.at_val == k] = True if self.nc > 0: imp_core_[self.at_core == k] = True if self.nvt > 0: imp_virt_[self.at_virt == k] = True print("imp val", imp_val) # embedding cf_tmp, ncore, nact, ImpOrbs_x = \ embedding.embedding (rdm_val, imp_val, \ threshold=self.thresh, \ transform_imp='hf') print("Doing EMBEDDING") print("cf_tmp", cf_tmp) print("ncore, nact", ncore, nact) print("ImpOrbs_x", ImpOrbs_x) cf_tmp = np.dot(self.A_val, cf_tmp) print("cf_tmp", cf_tmp) # localize imp+bath orbitals if self.method == 'dmrg': XR = np.random.rand(nact,nact) XR -= XR.T XS = sla.expm(0.01*XR) cf_ib = np.dot(cf_tmp[:,ncore:ncore+nact], XS) # loc = localizer.localizer (self.mol, cf_ib, 'boys') # loc.verbose = 5 # cf_ib = loc.optimize(threshold=1.0e-5) # del loc cf_ib = lo.Boys(mol, cd_ib).kernel() R = np.dot(cf_ib.T, \ np.dot(self.Sf, cf_tmp[:,ncore:ncore+nact])) print ( np.allclose(np.dot(cf_tmp[:,ncore:ncore+nact], \ ImpOrbs_x), \ np.dot(cf_ib, np.dot(R, ImpOrbs_x))) ) ImpOrbs_x = np.dot(R, ImpOrbs_x) cf_tmp[:,ncore:ncore+nact] = cf_ib print ( cf_ib ) # prepare ImpOrbs ni_val = nact nj_val = np.count_nonzero(imp_val_) if self.nc > 0: ni_core = np.count_nonzero(imp_core) nj_core = np.count_nonzero(imp_core_) else: ni_core = nj_core = 0 if self.nvt > 0: ni_virt = np.count_nonzero(imp_virt) nj_virt = np.count_nonzero(imp_virt_) else: ni_virt = nj_virt = 0 ii = 0 ImpOrbs = np.zeros((ni_val+ni_core+ni_virt,\ nj_val+nj_core+nj_virt,)) if self.nc > 0: j = 0 for i in range(self.nc): if imp_core[i] and imp_core_[i]: ImpOrbs[j,ii] = 1. ii += 1 if imp_core[i]: j += 1 j = 0 for i in range(self.nvl): if imp_val[i] and imp_val_[i]: ImpOrbs[ni_core:ni_core+ni_val,ii] = ImpOrbs_x[:,j] ii += 1 if imp_val[i]: j += 1 if self.nvt > 0: j = 0 for i in range(self.nvt): if imp_virt[i] and imp_virt_[i]: ImpOrbs[ni_core+ni_val+j,ii] = 1. ii += 1 if imp_virt[i]: j += 1 # prepare orbitals cf_core = cf_virt = None if self.nc > 0: cf_core = self.A_core[:,imp_core] if self.nvt > 0: cf_virt = self.A_virt[:,imp_virt] cf_val = cf_tmp[:,ncore:ncore+nact] if cf_core is not None and cf_virt is not None: cf = np.hstack ((cf_core, cf_val, cf_virt,)) elif cf_core is not None: cf = np.hstack ((cf_core, cf_val,)) elif cf_virt is not None: cf = np.hstack ((cf_val, cf_virt,)) else: cf = cf_val # prepare core if self.nc > 0: Ac_ = self.A_core[:,~(imp_core)] X_core = np.hstack((Ac_, cf_tmp[:,:ncore],)) else: X_core = cf_tmp[:,:ncore] n_orth = cf.shape[1] if cf_virt is not None: n_orth -= cf_virt.shape[1] print("x-core", X_core) print("cf b4 solver", cf) print("imporbs", ImpOrbs) if self.method == 'hf': nel_, en_ = \ pyscf_hf.solve (self.mol, \ 2*(self.nup-X_core.shape[1]), \ X_core, cf, ImpOrbs, chempot=chempot, \ n_orth=n_orth) elif self.method == 'cc': nel_, en_ = \ pyscf_cc.solve (self.mol, \ 2*(self.nup-X_core.shape[1]), \ X_core, cf, ImpOrbs, chempot=chempot, \ n_orth=n_orth,FrozenPot=self.FrozenPot) elif self.method == 'ccsd(t)': nel_, en_ = \ pyscf_ccsdt.solve (self.mol, \ 2*(self.nup-X_core.shape[1]), \ X_core, cf, ImpOrbs, chempot=chempot, \ n_orth=n_orth) elif self.method == 'mp2': nel_, en_ = \ pyscf_mp2.solve (self.mol, \ 2*(self.nup-X_core.shape[1]), \ X_core, cf, ImpOrbs, chempot=chempot, \ n_orth=n_orth,FrozenPot=self.FrozenPot) elif self.method == 'dfmp2': nel_, en_ = \ pyscf_dfmp2.solve (self.mol, \ 2*(self.nup-X_core.shape[1]), \ X_core, cf, ImpOrbs, chempot=chempot, \ n_orth=n_orth,FrozenPot=self.FrozenPot, mf_tot=self.mf_tot) elif self.method == 'dfmp2_testing': nel_, en_ = \ dfmp2_testing.solve (self.mol, \ 2*(self.nup-X_core.shape[1]), \ X_core, cf, ImpOrbs, chempot=chempot, \ n_orth=n_orth,FrozenPot=self.FrozenPot, mf_tot=self.mf_tot) elif self.method == 'dfmp2_testing2': # print(self.mol) # print(2*(self.nup-X_core.shape[1])) # print(X_core.shape) # print(cf.shape) # print(ImpOrbs.shape) # print(n_orth) nel_, en_ = \ dfmp2_testing2.solve (self.mol, \ 2*(self.nup-X_core.shape[1]), \ X_core, cf, ImpOrbs, chempot=chempot, \ n_orth=n_orth,FrozenPot=self.FrozenPot ) #, mf_tot=self.mf_tot) elif self.method == 'dfmp2_testing3': nel_, en_ = \ dfmp2_testing3.solve (self.mol, \ 2*(self.nup-X_core.shape[1]), \ X_core, cf, ImpOrbs, chempot=chempot, \ n_orth=n_orth,FrozenPot=self.FrozenPot, mf_tot=self.mf_tot) elif self.method == 'dfmp2_testing4': nel_, en_ = \ dfmp2_testing4.solve (self.mol, \ 2*(self.nup-X_core.shape[1]), \ X_core, cf, ImpOrbs, chempot=chempot, \ n_orth=n_orth,FrozenPot=self.FrozenPot, mf_tot=self.mf_tot) elif self.method == 'fci': nel_, en_ = \ pyscf_fci.solve (self.mol, \ 2*(self.nup-X_core.shape[1]), \ X_core, cf, ImpOrbs, chempot=chempot, \ n_orth=n_orth) elif self.method == 'dmrg': nel_, en_ = \ dmrg.solve (self.mol, \ 2*(self.nup-X_core.shape[1]), \ X_core, cf, ImpOrbs, chempot=chempot, \ n_orth=n_orth) nelec += nel_ energy += en_ if(self.parallel): nelec_tot = comm.reduce(nelec, op=MPI.SUM,root=0) energy_tot = comm.reduce(energy,op=MPI.SUM,root=0) if(rank==0): energy_tot += self.mol.energy_nuc()+self.e_core nelec = comm.bcast(nelec_tot, root=0) energy = comm.bcast(energy_tot,root=0) comm.barrier() if(rank==0): print ( 'DMET energy = ', energy ) else: energy+=self.mol.energy_nuc()+self.e_core print ( 'DMET energy = ', energy ) return nelec
# Split data into: Training, Testing, Validating SPLIT = 0.2 # Ratio of tests sample verses training samples SEED = 42 # random state of train_test_split, for better debugging # Hyper Parameters LEARNING_RATE = 0.05 LEARNING_DECAY = LEARNING_RATE / 32 BN_EPS = 0.8 EARLY_STOP = EarlyStopping(monitor='val_acc', patience=3) BATCH_SIZE = 128 EPOCH = 1 # Prepare training and testing samples TRAIN_PATH = os.path.join(os.getcwd(), 'TrainingSamples') logger.debug('Path for training sample: %s', TRAIN_PATH) LABEL, FEATURE = embedding(TRAIN_PATH) logger.debug('Training Size: %s', len(LABEL)) X_train, X_test, Y_train, Y_test = train_test_split(FEATURE, LABEL, test_size=SPLIT, random_state=SEED) X_train = np.array([i for i in X_train]) X_test = np.array([i for i in X_test]) Y_train = to_categorical(Y_train) Y_test = to_categorical(Y_test) logger.debug('training shape = %s', X_train.shape) DATA = [X_train, X_train, X_train, X_train] FIT_HISTORY = model.fit(DATA, Y_train, batch_size=BATCH_SIZE,
# --------------------------- Embedding process -------- if process_name == 'embedding': files = os.listdir(data_path) shutil.rmtree('workspace/img_marked', ignore_errors=True) os.makedirs('workspace/img_marked') # Path of watermarked image # --------- Generate the random binary watermark ----------- total_bit = int(capacity*np.prod(img_size)) mark = np.random.randint(2, size=(total_bit, 1), dtype='uint8') switched_block = [] # Percent of switched NROI block into ROI block start_time = time.time() for file in tqdm(files): img_org = Image.open(os.path.join(data_path, file)) img_org = np.asarray(img_org) [img_marked, switched] = embedding(img_org, img_size, mark, block_size, thresh, coefficient, segment_model_path) # embedding switched_block.append(switched) img_marked = Image.fromarray(img_marked) img_marked.save(os.path.join('workspace/img_marked', file)) elapsed = time.time() - start_time sio.savemat('workspace/mark_'+str(capacity)+'.mat', {'mark': mark}) print('Average percent of switched NROI block into ROI block : ' + str(np.mean(switched_block))) print('Embedding time: ', elapsed) # ------------------------------------ Extraction process----------- elif process_name == 'extraction': files = os.listdir(data_path) shutil.rmtree('workspace/img_recovered', ignore_errors=True) os.makedirs('workspace/img_recovered') mark_orig = sio.loadmat('./workspace/mark_' + str(capacity) + '.mat')['mark']
def page3(): train_file_list = os.listdir(train_file_path) test_file_list = os.listdir(test_file_path) embed_model_list = os.listdir(embed_model_path) machine_model_list = os.listdir(machine_model_path) if request.method == "POST": # 시각화 버튼을 눌렀을 경우 if request.form.get("visual_button"): response_data = request.form.get("visual_button") response_data = json.loads(response_data) print(response_data) trainFile = response_data['trainData'] testFile = response_data['testData'] # 데이터 읽기 train = pd.read_csv(train_file_path + trainFile) test = pd.read_csv(test_file_path + testFile) # 결측치가 있는지 확인하기(우선은 제거하는 방식) if pd.isnull(train['x']).sum() > 0 or pd.isnull( train['y']).sum() > 0: train = train.dropna() if pd.isnull(test['x']).sum() > 0 or pd.isnull( test['y']).sum() > 0: test = test.dropna() train = train.sample(frac=1).reset_index(drop=True) test = test.sample(frac=1).reset_index(drop=True) # 1) 처음 임베딩 및 시각화인 경우 -> 임베딩 파라미터만 받아오면 됨 # is_pre_embed 없음, is_pre_train 없음, machine_value [] if 'is_pre_embed' not in response_data and 'is_pre_machine' not in response_data and response_data[ 'machine_value'] == []: print('first-embed, no-machine') embed_type = response_data['embed_type'] embed_params = get_embed_params(embed_type, response_data['embed_value']) # 임베딩 X_train, X_test, y_train, y_test = embedding( trainFile.split(".")[0], embed_type, train, test, embed_params) # 차원축소 dimension_type = response_data['dimension_type'] dimension_reduction(dimension_type, X_train, X_test, y_train, y_test) return render_template( 'visualization.html', visualization="embedding_and_visualization") # 2) pre 임베딩 및 시각화인 경우 -> 어떠한 파라미터도 받을 필요 없음 # is_pre_embed 있음, is_pre_train 없음, embed_value [], machine_value [] elif 'is_pre_embed' in response_data and 'is_pre_machine' not in response_data and response_data[ 'embed_value'] == [] and response_data[ 'machine_value'] == []: print('pre-embed, no-machine') embed_type = response_data['embed_type'] pre_embed_model = response_data['pre_embed_model'] # 임베딩 X_train, X_test, y_train, y_test = pre_train_embedding( embed_type, pre_embed_model, train, test) # 차원축소 dimension_type = response_data['dimension_type'] dimension_reduction(dimension_type, X_train, X_test, y_train, y_test) return render_template( 'visualization.html', visualization="embedding_and_visualization") # 3) 처음 임베딩 및 처음 머신러닝 및 시각화인 경우 -> 임베딩, 머신러닝 파라미터 모두 받아오면 됨 # is_pre_embed 없음, is_pre_train 없음, machine_value 있음 elif 'is_pre_embed' not in response_data and 'is_pre_machine' not in response_data and response_data[ 'machine_value'] != []: print('first-embed, first-machine') embed_type = response_data['embed_type'] embed_params = get_embed_params(embed_type, response_data['embed_value']) machine_type = response_data['machine_type'] machine_params = get_machine_params( machine_type, response_data['machine_value']) # 임베딩 X_train, X_test, y_train, y_test = embedding( trainFile.split(".")[0], embed_type, train, test, embed_params) # 차원축소 dimension_type = response_data['dimension_type'] dimension_reduction(dimension_type, X_train, X_test, y_train, y_test) # 머신러닝 train_y_pred, test_y_pred = machine_learning( embed_type, machine_type, X_train, X_test, y_train, y_test, machine_params) # 4) pre 임베딩 및 처음 머신러닝 및 시각화인 경우 -> 머신러닝 파라미터만 받아오면 됨 # is_pre_embed 있음, is_pre_train 없음, embed_value [], machine_value 있음 elif 'is_pre_embed' in response_data and 'is_pre_machine' not in response_data and response_data[ 'embed_value'] == [] and response_data[ 'machine_value'] != []: print('pre-embed, first-machine') embed_type = response_data['embed_type'] pre_embed_model = response_data['pre_embed_model'] machine_type = response_data['machine_type'] machine_params = get_machine_params( machine_type, response_data['machine_value']) # 임베딩 X_train, X_test, y_train, y_test = pre_train_embedding( embed_type, pre_embed_model, train, test) # 차원축소 dimension_type = response_data['dimension_type'] dimension_reduction(dimension_type, X_train, X_test, y_train, y_test) # 머신러닝 train_y_pred, test_y_pred = machine_learning( embed_type, machine_type, X_train, X_test, y_train, y_test, machine_params) # 5) pre 임베딩 및 pre 머신러닝 및 시각화인 경우 -> 어떠한 파라미터도 받을 필요 없음 # is_pre_embed 있음, is_pre_train 있음 elif 'is_pre_embed' in response_data and 'is_pre_machine' in response_data: print('pre-embed, pre-machine') embed_type = response_data['embed_type'] machine_type = response_data['machine_type'] pre_embed_model = response_data['pre_embed_model'] # 임베딩 X_train, X_test, y_train, y_test = pre_train_embedding( embed_type, pre_embed_model, train, test) # 차원축소 dimension_type = response_data['dimension_type'] dimension_reduction(dimension_type, X_train, X_test, y_train, y_test) # 머신러닝 train_y_pred, test_y_pred = pre_train_machine_learning( embed_type, machine_type, X_train, X_test, y_train, y_test) # 훈련 종료 후 머신러닝 결과 from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score target_names = list(set(y_train)) train_df = pd.DataFrame(confusion_matrix(y_train, train_y_pred), index=target_names, columns=target_names) test_df = pd.DataFrame(confusion_matrix(y_test, test_y_pred), index=target_names, columns=target_names) path = r'/home/ubuntu/project2/csv_files/' train_df.to_csv(path + 'confusion_matrix_train.csv', index=False) test_df.to_csv(path + 'confusion_matrix_test.csv', index=False) # 분류 평가 지표 train_accuracy = accuracy_score(y_train, train_y_pred) train_precision = precision_score(y_train, train_y_pred, average='macro') train_recall = recall_score(y_train, train_y_pred, average='macro') train_f1 = f1_score(y_train, train_y_pred, average='macro') test_accuracy = accuracy_score(y_test, test_y_pred) test_precision = precision_score(y_test, test_y_pred, average='macro') test_recall = recall_score(y_test, test_y_pred, average='macro') test_f1 = f1_score(y_test, test_y_pred, average='macro') print('train accuracy: {}, test accuracy: {}'.format( train_accuracy, test_accuracy)) print('train precision: {}, test precision: {}'.format( train_precision, test_precision)) print('train recall: {}, test recall: {}'.format( train_recall, test_recall)) print('train f1: {}, test f1: {}'.format(train_f1, test_f1)) train_score_df = pd.DataFrame(columns=['Metrics', 'Score']) train_score_df['Metrics'] = [ 'accuracy', 'precision', 'recall', 'f1' ] train_score_df['Score'] = [ round(train_accuracy, 2), round(train_precision, 2), round(train_recall, 2), round(train_f1, 2) ] train_score_df.to_csv(path + 'metrics_score_train.csv', index=False) test_score_df = pd.DataFrame(columns=['Metrics', 'Score']) test_score_df['Metrics'] = [ 'accuracy', 'precision', 'recall', 'f1' ] test_score_df['Score'] = [ round(test_accuracy, 2), round(test_precision, 2), round(test_recall, 2), round(test_f1, 2) ] test_score_df.to_csv(path + 'metrics_score_test.csv', index=False) train_df = pd.read_csv(path + 'embedding_and_visualization_train.csv') test_df = pd.read_csv(path + 'embedding_and_visualization_test.csv') train_df['pred'] = train_y_pred train_df['success'] = train_df['pred'] == train_df['target'] train_df['success'] = train_df['success'].astype(int) test_df['pred'] = test_y_pred test_df['success'] = test_df['pred'] == test_df['target'] test_df['success'] = test_df['success'].astype(int) success_mapping_table = {0: "실패", 1: "성공"} train_df['success'] = train_df['success'].map( success_mapping_table) test_df['success'] = test_df['success'].map(success_mapping_table) train_df.to_csv( path + 'embedding_and_machinelearning_visualization_train.csv', index=False) test_df.to_csv( path + 'embedding_and_machinelearning_visualization_test.csv', index=False) return render_template( 'visualization.html', visualization="embedding_and_machineLearning_visualization") return render_template('machineLearning.html', train_file_list=train_file_list, test_file_list=test_file_list, embed_model_list=embed_model_list, machine_model_list=machine_model_list) else: return render_template("machineLearning.html", train_file_list=train_file_list, test_file_list=test_file_list, embed_model_list=embed_model_list, machine_model_list=machine_model_list)
input_filepath, keyword, delay, m = trend_options[args.keyword] data = utilities.read_csv(input_filepath, " ") utilities.plot_series(data, input_filepath, keyword) embedding.mutual_information(input_filepath, len(data)) theiler = 0 min_dim = 1 max_dim = 10 ratio = 10.0 embedding.false_nearest_neighbors(input_filepath, delay, theiler, min_dim, max_dim, ratio) embedded = embedding.embedding(input_filepath, data, delay, m, keyword) utilities.plot_embedding(embedded, input_filepath, [1, 2]) #embedding.recurrence(input_filepath, delay) # args.k = 5 # baseball (Error: 0.387174821025) # args.k = 5 # influenza (Error: 1.25175439578) # args.k = 5 # full mooon (Error: 0.907941254943) if args.multistep: print( "Since multi-step forecast is {0}, number of nearest neighbors (currently {1}) must be set to 1" .format(args.multistep, args.k)) args.k = 1 data = knn.Data(input_filepath + ".embed")
def embedding(self, texts): # 전처리, 임베딩 수행 texts = pre.preprocess(texts) embed = emb.embedding(texts) embed = emb.padding(embed) return embed