Data = DM.data_to_MD(DM.fullData_to_data(FullTimeData)) Time = DM.time_to_HMS(DM.fullData_to_time(FullTimeData)) X = np.column_stack((DigitData, Data, Time)) X = np.asarray(X).astype('float32') Y = np.asarray(Y).astype('int') #нормализация np.random.seed(2) indices = DM.mixedIndex(X) X = X[indices] Y = Y[indices] #нормализация X = DM.normalization(X) Y = DM.to_one_hot(Y) from keras import models from keras import layers from keras import regularizers from keras.optimizers import RMSprop model = models.Sequential() model.add(layers.Dense(32, activation='relu', input_shape=(X.shape[1], ))) model.add(layers.Dropout(0.15)) model.add(layers.Dense(32, activation='relu')) model.add(layers.Dropout(0.02)) #model.add(layers.Dense(32,activation='relu')) #model.add(layers.Dense(16,activation='relu')) model.add(layers.Dense(3, activation='softmax'))
delimiter=";", skip_header=1) #%% #8.2 Обработка и нормализация данных #доп. стоблец баллы для компаний с разрешёнными в sic-кодами ballSic = DM.covid19SicCode(SIC_codes, sucsSIC_codes) X = np.column_stack((fiches, ballSic, adspends_s1[:, :-3])) Y = adspends_s1[:, -3:] X = np.asarray(X).astype('float32') Y = np.asarray(Y).astype('int') X, mean, std = DM.normalization(X) # перемешивание данных indices = DM.mixedIndex(X) X = X[indices] Y = Y[indices] #%% #8.3 Инициализация модели from keras import models from keras import layers from keras import regularizers model = models.Sequential() model.add(layers.Dense(32, activation='relu', input_shape=(X.shape[1], )))
AnswerTypes = ["interest_level"] tupeConvert = {"interest_level": {"low": 0, "medium": 1, "high": 2}} DigitData = np.array(DM.get_categories(dataJS, DigitTypes)) FullTimeData = DM.get_categories(dataJS, FullTimeTypes) Data = DM.data_to_MD(DM.fullData_to_data(FullTimeData)) Time = DM.time_to_HMS(DM.fullData_to_time(FullTimeData)) TextData = np.asarray(DM.get_arr(dataJS, TextTypes)) AnswerData = np.array( DM.get_arr(DM.modifier_fiches_type(dataJS, tupeConvert), AnswerTypes)) X_1 = np.column_stack((DigitData, Data, Time)) X_1 = np.asarray(X_1).astype('float32') X_1 = DM.normalization(X_1) X_2 = TextData Y = DM.to_one_hot(AnswerData) #нормализация indices = DM.mixedIndex(X_1) X_1 = X_1[indices] X_2 = X_2[indices] Y = Y[indices] tokinizer = Tokenizer(num_words=3000) tokinizer.fit_on_texts(X_2) sequences = tokinizer.texts_to_sequences(X_2)
sucsSIC_codes = np.genfromtxt("data/DataSet/sucsSIC_codes.csv", dtype='str', delimiter=";", skip_header=1) ballSic = DM.covid19SicCode(SIC_codes, sucsSIC_codes) X1 = np.column_stack((fiches[:, :4], ballSic)) X2 = np.column_stack((adspends_s1[:, :-3], fiches[:, -3:])) Y = adspends_s1[:, -3:] X1 = np.asarray(X1).astype('float32') X2 = np.asarray(X2).astype('float32') Y = np.asarray(Y).astype('int') X1, mean1, std1 = DM.normalization(X1) X2, mean2, std2 = DM.normalization(X2) #Y = DM.normalization(Y) np.random.seed(426) indices = DM.mixedIndex(X1) X1 = X1[indices] X2 = X2[indices] Y = Y[indices] from keras import models from keras.models import Model from keras import layers from keras import Input from keras import regularizers