def ana_input(self,input_data): self.logger.info("in ana_input") flows = input_data['flows'] for flow in flows: args = flow.split(' ') src = args[0] dst = args[1] try: port = int(args[2]) except: port = None old = tools.str_to_list(args[3]) new = tools.str_to_list(args[4]) self.logger.info(new) trans_type = args[5] ratio = float(args[6]) bw = int(args[7]) flow = FlowDesGlobal(src,dst,port,old,new,None,trans_type) flow.ratio = ratio flow.bw = bw self.logger.info(flow.new) self.logger.info(flow.trans_type) self.flows_to_schedule.update({src+dst+str(port):flow}) self.logger.info(self.flows_to_schedule) self.logger.info("---------------------start!---------------------") self.logger.info(nowTime())
def embedding(csv_path): train = pd.read_csv(csv_path) x_data = [] for sentence in train.new_article.values: data = str_to_list(sentence) x_data.append(data) w2v_model = gensim.models.word2vec.Word2Vec(size=100, window=5, min_count=2) w2v_model.build_vocab(x_data) words = w2v_model.wv.vocab.keys() vocab_size = len(words) print("Vocab size", vocab_size) # Train Word Embeddings w2v_model.train(x_data, total_examples=len(x_data), epochs=100) return w2v_model
import os from model import base_model from pathlib import Path from tools import str_to_list, text2sequence, glove_word2vec from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger DATA_PATH = Path("data") max_len = 100 epochs = 10 batch_size = 128 train = pd.read_csv(DATA_PATH / "prep_news_train.csv") word2vec = gensim.models.Word2Vec.load('./data/news_min0.embedding') train_data = [ str_to_list(sentence) for sentence in train.new_article.values ] train_X, vocab_size, tokenizer = text2sequence(train_data, max_len = max_len) train_y = train['info'] word_index = tokenizer.word_index embedding_matrix = np.zeros((vocab_size, max_len)) for word, index in word_index.items(): if word in word2vec: embedding_vector = word2vec[word] embedding_matrix[index] = embedding_vector else: print("word2vec에 없는 단어입니다.") break
import pandas as pd import numpy as np from pathlib import Path from tools import str_to_list, text2sequence DATA_PATH = Path("data") train = pd.read_csv(DATA_PATH / "prep_news_train.csv") x_data = [] for sentence in train.new_article.values: data = str_to_list(sentence) x_data.append(data) train_X, vocab_size, tokenizer = text2sequence(x_data, max_len = 100) train_y = train['info'] word_index = tokenizer.word_index for i in word_index.items(): print(i) break