def create_model(argv, loader): if argv.model == "nb": model = NB_Model(argv) elif argv.model == "dan": model = DAN(argv) elif argv.model == "blstm": model = BLSTM(argv, loader.desired_len) elif argv.model == "blstm_2dcnn": model = BLSTM_2DCNN(argv, loader.desired_len) return model
def create_model_hierarchy(cls, bottom_item_list, emb_wgts_bottom_items_dict, layer_nums=3, rnn_state_size=[], bottom_emb_item_len=3, flag_embedding_trainable=1, seq_len=39, batch_size=20, mode_attention=1, drop_out_r=0., att_layer_cnt=2, bhDwellAtt=0, rnn_type="WGRU", RNN_norm="GRU", flagCid3RNNNorm=False): c_mask_value = 0. att_zero_value = -2 ^ 31 def slice(x): return x[:, -1, :] flag_concate_sku_cid = True RNN = rnn_type MODE_BHDWELLATT = True if bhDwellAtt == 1 else False ATT_NET_LAYER_CNT = att_layer_cnt bottom_item_len = len(bottom_item_list) input = [None] * bottom_item_len word_num = [None] * bottom_item_len emb_len = [None] * bottom_item_len embedding_bottom_item = [None] * bottom_item_len embed = [None] * bottom_item_len layer_nums_max = 3 rnn_embed = [None] * layer_nums_max rnn = [None] * layer_nums_max rnn_output = [None] * layer_nums_max flag_embedding_trainable = True if flag_embedding_trainable == 1 else False ##Embedding layer # Embedding sku, bh, cid3, gap, dwell: 0, 1, 2, 3, 4 for i in range(bottom_item_len): bottom_item = bottom_item_list[i] ###input input[i] = Input(batch_shape=( batch_size, seq_len, ), dtype='int32') ###Embedding # load embedding weights # emb_wgts[i] = np.loadtxt(init_wgts_file_emb[i]) word_num[i], emb_len[i] = emb_wgts_bottom_items_dict[ bottom_item].shape print word_num[i], emb_len[i] # get embedding cur_flag_embedding_trainable = flag_embedding_trainable if (i == 0): cur_flag_embedding_trainable = False embedding_bottom_item[i] = Embedding( word_num[i], emb_len[i], input_length=seq_len, trainable=cur_flag_embedding_trainable) embed[i] = embedding_bottom_item[i](input[i]) # drop_out=0.2 embedding_bottom_item[i].set_weights( [emb_wgts_bottom_items_dict[bottom_item]]) # cal mask mask_sku = np.zeros((batch_size, seq_len)) mask_cid3 = np.zeros((batch_size, seq_len)) for j in range(batch_size): sku = input[0][j, :] cid3 = input[2][j, :] for k in range(seq_len - 1): if (sku[k] == 0 or sku[k] == sku[k + 1]): mask_sku[j][k] = 1 if (sku[k] == 0 or cid3[k] == cid3[k + 1]): mask_cid3[j][k] = 1 # f mask def f_mask_sku(x): x_new = x for j in range(batch_size): for k in range(seq_len): if (mask_sku[j][k] == 1): x_new = T.set_subtensor(x_new[j, k, :], c_mask_value) return x_new def f_mask_cid3(x): x_new = x for j in range(batch_size): for k in range(seq_len): if (mask_cid3[j][k] == 1): x_new = T.set_subtensor(x_new[j, k, :], c_mask_value) return x_new def f_mask_att_sku(x): x_new = x for j in range(batch_size): for k in range(seq_len): if (mask_sku[j][k] == 1): x_new = T.set_subtensor(x_new[j, k], att_zero_value) return x_new def f_mask_att_cid3(x): x_new = x for j in range(batch_size): for k in range(seq_len): if (mask_cid3[j][k] == 1): x_new = T.set_subtensor(x_new[j, k], att_zero_value) return x_new def K_dot(arr): axes = [1, 1] x, y = arr[0], arr[1] return K.batch_dot(x, y, axes=axes) def K_squeeze(x): return K.squeeze(x, axis=-1) Lambda_sequeeze = Lambda(lambda x: K_squeeze(x)) ##RNN layer if (RNN == "BLSTM"): rnn[0] = BLSTM(rnn_state_size[0], interval_dim=emb_len[3], weight_dim=emb_len[1], stateful=False, return_sequences=True, dropout=drop_out_r, name="rnn_out_micro") rnn[1] = BLSTM(rnn_state_size[1], interval_dim=emb_len[3], weight_dim=emb_len[4], stateful=False, return_sequences=True, dropout=drop_out_r, name="rnn_out_sku") if (not flagCid3RNNNorm): rnn[2] = BLSTM(rnn_state_size[2], interval_dim=emb_len[3], weight_dim=0, stateful=False, return_sequences=True, dropout=drop_out_r, name="rnn_out_cid3") elif (RNN == "BLSTM2"): rnn[0] = BLSTM2(rnn_state_size[0], interval_dim=emb_len[3], weight_dim=emb_len[1], stateful=False, return_sequences=True, dropout=drop_out_r, name="rnn_out_micro") rnn[1] = BLSTM2(rnn_state_size[1], interval_dim=emb_len[3], weight_dim=emb_len[4], stateful=False, return_sequences=True, dropout=drop_out_r, name="rnn_out_sku") if (not flagCid3RNNNorm): rnn[2] = BLSTM2(rnn_state_size[2], interval_dim=emb_len[3], weight_dim=0, stateful=False, return_sequences=True, dropout=drop_out_r, name="rnn_out_cid3") elif (RNN == "TimeLSTM"): rnn[0] = BLSTM(rnn_state_size[0], interval_dim=emb_len[3], weight_dim=0, stateful=False, return_sequences=True, dropout=drop_out_r, name="rnn_out_micro") rnn[1] = BLSTM(rnn_state_size[1], interval_dim=emb_len[3], weight_dim=0, stateful=False, return_sequences=True, dropout=drop_out_r, name="rnn_out_sku") if (not flagCid3RNNNorm): rnn[2] = BLSTM(rnn_state_size[2], interval_dim=emb_len[3], weight_dim=0, stateful=False, return_sequences=True, dropout=drop_out_r, name="rnn_out_cid3") elif (RNN == "WGRU"): rnn[0] = WGRU(rnn_state_size[0], weight_dim=emb_len[1], stateful=False, return_sequences=True, dropout=drop_out_r, name="rnn_out_micro") rnn[1] = WGRU(rnn_state_size[1], weight_dim=emb_len[3], stateful=False, return_sequences=True, dropout=drop_out_r, name="rnn_out_sku") if (not flagCid3RNNNorm): rnn[2] = WGRU(rnn_state_size[2], weight_dim=emb_len[3], tateful=False, return_sequences=True, dropout=drop_out_r, name="rnn_out_cid3") elif (RNN == "LSTM" or RNN == "GRU"): RNN = LSTM if RNN == "LSTM" else GRU rnn[0] = RNN(rnn_state_size[0], stateful=False, return_sequences=True, dropout=drop_out_r, name="rnn_out_micro") rnn[1] = RNN(rnn_state_size[1], stateful=False, return_sequences=True, dropout=drop_out_r, name="rnn_out_sku") else: print "%s is not valid RNN!" % RNN if (RNN_norm == "LSTM"): rnn_cid3 = LSTM else: rnn_cid3 = GRU if (flagCid3RNNNorm): rnn[2] = rnn_cid3(rnn_state_size[2], stateful=False, return_sequences=True, dropout=drop_out_r, name="rnn_out_cid3") # rnn embed 0 if (bottom_emb_item_len == 5): rnn_embed[0] = Concatenate(axis=-1)( [embed[0], embed[1], embed[2], embed[3], embed[4]]) elif (bottom_emb_item_len == 4): rnn_embed[0] = Concatenate(axis=-1)( [embed[0], embed[1], embed[2], embed[3]]) elif (bottom_emb_item_len == 3): rnn_embed[0] = Concatenate(axis=-1)([embed[0], embed[1], embed[2]]) elif (bottom_emb_item_len == 1): rnn_embed[0] = embed[0] elif (bottom_emb_item_len == 2): rnn_embed[0] = Concatenate(axis=-1)([embed[0], embed[2]]) else: rnn_embed[0] = Concatenate(axis=-1)( [embed[0], embed[1], embed[2], embed[3]]) # add interval, wei if (RNN == "WGRU"): rnn_embed[0] = Concatenate(axis=-1)([rnn_embed[0], embed[1]]) if (RNN == "BLSTM" or RNN == "BLSTM2"): rnn_embed[0] = Concatenate(axis=-1)( [rnn_embed[0], embed[3], embed[1]]) if (RNN == "TimeLSTM"): rnn_embed[0] = Concatenate(axis=-1)([rnn_embed[0], embed[3]]) # rnn micro rnn_output[0] = rnn[0](rnn_embed[0]) # rnn sku if (flag_concate_sku_cid): rnn_embed[1] = Concatenate(axis=-1)([embed[0], rnn_output[0]]) else: rnn_embed[1] = rnn_output[0] # mask sku # rnn embed 1 # rnn_embed[1] = Lambda(f_mask_sku, output_shape=(seq_len, rnn_state_size[1]))(rnn_embed[1]) if (RNN == "WGRU"): rnn_embed[1] = Concatenate(axis=-1)([rnn_embed[1], embed[4]]) if (RNN == "BLSTM" or RNN == "BLSTM2"): rnn_embed[1] = Concatenate(axis=-1)( [rnn_embed[1], embed[3], embed[4]]) if (RNN == "TimeLSTM"): rnn_embed[1] = Concatenate(axis=-1)([rnn_embed[1], embed[3]]) rnn_embed[1] = Lambda(f_mask_sku)(rnn_embed[1]) rnn_embed[1] = Masking(mask_value=c_mask_value)(rnn_embed[1]) rnn_output[1] = rnn[1](rnn_embed[1]) # rnn cid3 if (flag_concate_sku_cid): rnn_embed[2] = Concatenate()([embed[2], rnn_output[1]]) else: rnn_embed[2] = rnn_output[1] if (not flagCid3RNNNorm): rnn_embed[2] = Concatenate(axis=-1)([rnn_embed[2], embed[3]]) # mask cid3 # rnn_embed[2] = Lambda(f_mask_cid3, output_shape=(seq_len, rnn_state_size[2]))(rnn_embed[2]) rnn_embed[2] = Lambda(f_mask_cid3)(rnn_embed[2]) rnn_embed[2] = Masking(mask_value=c_mask_value)(rnn_embed[2]) rnn_output[2] = rnn[2](rnn_embed[2]) # rnn final output rnn_out_final = rnn_output[layer_nums - 1] rnn_out_micro = rnn_output[0] rnn_out_sku = rnn_output[1] rnn_out_cid3 = rnn_output[2] # predict sku, cid3 if (mode_attention == 0): # micro att_out_micro = Lambda( slice, output_shape=(rnn_state_size[0], ))(rnn_out_micro) # trans to sku emb len out_micro_sku_emb = Dense(emb_len[0], activation="tanh")(att_out_micro) out_micro = out_micro_sku_emb # sku att_out_sku = Lambda( slice, output_shape=(rnn_state_size[1], ))(rnn_out_sku) # trans to sku emb len out_sku_emb = Dense(emb_len[0], activation="tanh")(att_out_sku) out_sku = out_sku_emb # cid3 att_out_cid3 = Lambda( slice, output_shape=(rnn_state_size[2], ))(rnn_out_cid3) out_cid3_emb = Dense(emb_len[2], activation="tanh")(att_out_cid3) out_cid3 = out_cid3_emb # out_cid3 = Dense(word_num[2], activation="softmax")(out_cid3_emb) if (mode_attention == 2): # atten micro m_h = rnn_out_micro m_h_last = Lambda(slice, output_shape=(rnn_state_size[0], ), name="rnn_out_micro_last")(m_h) m_h_r = RepeatVector(seq_len)(m_h_last) if (MODE_BHDWELLATT): m_h_c = Concatenate(axis=-1)([m_h, m_h_r, embed[1]]) else: m_h_c = Concatenate(axis=-1)([m_h, m_h_r]) if (ATT_NET_LAYER_CNT == 2): m_h_a_1 = TimeDistributed( Dense(ATT_NET_HIDSIZE, activation='tanh'))(m_h_c) m_h_a = TimeDistributed(Dense(1, activation='tanh'))(m_h_a_1) else: m_h_a = TimeDistributed(Dense(1, activation='tanh'))(m_h_c) m_h_a = Lambda(lambda x: x, output_shape=lambda s: s)(m_h_a) m_att = Flatten()(m_h_a) m_att_micro = Softmax(name="att_micro")(m_att) m_att_out = Lambda(K_dot, output_shape=(rnn_state_size[0], ), name="out_micro_pre")([m_h, m_att_micro]) # trans to sku emb len out_micro = Dense(emb_len[0], activation="tanh")(m_att_out) # attenion sku s_h = rnn_out_sku s_h_last = Lambda(slice, output_shape=(rnn_state_size[1], ), name="rnn_out_sku_last")(s_h) s_h_r = RepeatVector(seq_len)(s_h_last) if (MODE_BHDWELLATT): s_h_c = Concatenate(axis=-1)([s_h, s_h_r, embed[4]]) else: s_h_c = Concatenate(axis=-1)([s_h, s_h_r]) if (ATT_NET_LAYER_CNT == 2): s_h_a_1 = TimeDistributed( Dense(ATT_NET_HIDSIZE, activation='tanh'))(s_h_c) s_h_a = TimeDistributed(Dense(1, activation='tanh'))(s_h_a_1) else: s_h_a = TimeDistributed(Dense(1, activation='tanh'))(s_h_c) s_h_a = Lambda(lambda x: x, output_shape=lambda s: s)(s_h_a) s_att = Flatten()(s_h_a) s_att = Lambda(f_mask_att_sku)(s_att) s_att_sku = Softmax(axis=-1, name="att_sku")(s_att) s_att_out = Lambda(K_dot, output_shape=(rnn_state_size[1], ), name="out_sku_pre")([s_h, s_att_sku]) # attention cid3 c_h = rnn_out_cid3 c_h_last = Lambda(slice, output_shape=(rnn_state_size[2], ), name="rnn_out_cid3_last")(c_h) c_h_r = RepeatVector(seq_len)(c_h_last) c_h_c = Concatenate(axis=-1)([c_h, c_h_r]) if (ATT_NET_LAYER_CNT == 2): c_h_a_1 = TimeDistributed( Dense(ATT_NET_HIDSIZE, activation='tanh'))(c_h_c) c_h_a = TimeDistributed(Dense(1, activation='tanh'))(c_h_a_1) else: c_h_a = TimeDistributed(Dense(1, activation='tanh'))(c_h_c) c_h_a = Lambda(lambda x: x, output_shape=lambda s: s)(c_h_a) c_att = Flatten()(c_h_a) c_att = Lambda(f_mask_att_cid3)(c_att) c_att_cid3 = Softmax(axis=-1, name="att_cid3")(c_att) c_att_out = Lambda(K_dot, output_shape=(rnn_state_size[2], ), name="out_cid3_pre")([c_h, c_att_cid3]) out_cid3 = Dense(emb_len[2], activation="tanh")(c_att_out) out_sku = Dense(emb_len[0], activation="tanh")(s_att_out) # model model = Model( inputs=[input[0], input[1], input[2], input[3], input[4]], outputs=[out_micro, out_sku, out_cid3]) # return embedding, rnn, ret_with_target, input, out return model
#index2answer = {1: '采用“流量模块+语音模块+短信模块”自由定制形式,每个模块提供阶梯单价,用户自由定制,任意组合,每月承诺消费不低于19元。', 2: '个人定制套餐为4G套餐,默认开通4G功能。', 3: '若您为个人定制套餐用户,您在手机端进入手机版电信营业厅,完成登录后,点击底部导航“服务”,切换到“办理”页面中进行办理。', 4: '很抱歉,目前只支持新入网用户办理此套餐。', 5: '您可以到本地营业厅办理销户。', 6: '号卡激活后当月月基本功能费和套餐内容均按天折算计扣,按照过渡期资费处理。', 7: '自激活日起,根据订购套餐月基本费按天折算计扣(入网当日到月底),四舍五入到分,套餐内容(如手机上网流量、通话时长、短信条数等)按天数折算,向上取整,业务转换、业务转赠功能次月开放。', 8: '套餐外资费:国内流量0.0001元/KB,国内通话0.15元/分钟,国内短信0.1元/条,其他按标准资费收取。', 9: '具体可变更套餐规则请咨询当地营业厅。', 10: '个人定制套餐暂不支持副卡办理。', 11: ' 个人定制套餐暂不支持办理流量包、语音包、短信包业务。', 12: '个人定制套餐暂不支持流量包业务。', 13: '个人定制套餐暂不支持语音包业务。', 14: '个人定制套餐暂不支持短信包业务。', 15: '转换业务是指仅套餐内订购三种业务量(含套餐内被赠的业务量)可在当月内按照转换规则进行自由互转,套餐外优惠叠加及其他活动转赠或者充值的业务量(例如流量卡充值流量)不在转换范围内,例如:使用剩余语音业务量可按照转换规则转换为流量。转换规则为1分钟语音=2MB流量=2条短信。每月可用于转换的语音上限值为1000分钟(加和值,即多次使用语音进行转换的语音总量不超过1000分钟),可用于转换的流量上限值为1000MB(加和值),可用于转换的短信上限值为100条(加和值)每月最多转换3次,每次转换1种业务。个人定制当月转赠、转换及套餐内剩余流量均可保留到次月,但这些流量在次月不可被再次转赠和转换。套餐业务转换在您账户正常状态下可使用,如您账户存在欠费、停机、挂失等问题,则无法使用该项业务。', 16: '转赠业务是指仅套餐内订购流量的剩余可用量(含套餐内转换业务量,不能二次转赠)可在当月内向同时正在使用本套餐本省的其他用户进行转赠,套餐外优惠叠加或充值的业务量(例如流量卡充值流量)不在转赠范围内。每月可用于转赠的流量上限值为1000MB(加和值,即多次使用流量进行转赠的总量不超过1000MB)每月最多转赠3次,单次转赠1人。每月获赠不受次数限制。个人定制当月转赠、转换及套餐内剩余流量均可保留到次月,但这些流量在次月不可被再次转赠和转换。套餐业务转换在您账户正常状态下可使用,如您账户存在欠费、停机、挂失等问题,则无法使用该项业务。', 17: '您在手机端进入手机版电信营业厅,完成登录后,点击底部导航“服务”,切换到“办理”页面即可进行“套餐变更”办理。', 18: '您在手机端进入手机版电信营业厅,完成登录后,点击底部导航“服务”,切换到“办理”页面即可进行“套餐转换”办理。', 19: '您在手机端进入手机版电信营业厅,完成登录后,点击底部导航“服务”,\t切换到“办理”页面即可进行“套餐转赠”办理。', 20: '您在手机端进入手机版电信营业厅,完成登录后,点击底部导航“服务”,切换到“查询”页面即可查询套餐变更记录。', 21: '您好,有的。当您在网上营业厅使用转赠功能时,可勾选短信提醒被赠人,并写下您对被赠人的留言。当您在手机版电信营业厅使用转赠功能时,会默认给被赠人发送短信提醒,您还可以写下您给被赠人的留言。', 22: '您好,个人定制套餐不激活保留时间因全国各省规则不同,请您咨询本省10000号或到本地营业厅进行咨询。', 23: '1.登录网上营业厅。2.登录手机营业厅客户端。3.联系当地人工客服或者前往营业厅进行查询。', 24: '您可登录网上营业厅http://www.189.cn/ 首页点击费用>我的话费>余额查询,即可查询可用余额情况。'} # 读取词向量 embedding_matrix = get_embedding(emb_path, 'word', 400) print("building model") config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: with tf.device(gpu_config): initializer = tf.random_uniform_initializer(-0.02, 0.02) with tf.variable_scope("model", reuse=None, initializer=initializer): # 模型实例 model = BLSTM(400, 20, embedding_matrix, attention=True, num_epochs=100, dropout=0.3, is_training=False) saver = tf.train.Saver() # 读取训练好的模型参数 saver.restore(sess, 'models/model') # Demo主体部分 # 预测,string为要预测的字符串 while True: string = str(input("Please input the string:")) # 输出预测结果 label, prob = model.predict_label(sess, string) if prob[0][label] > 0.4:
id2word = pickle.load(open('id2word', 'rb')) X_train = np.array([[word2id[word] for word in line] for line in X_train]) # 正在读取词向量 embedding_matrix = get_embedding(emb_path, 'word', 400) print("building model") config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: with tf.device(gpu_config): initializer = tf.random_uniform_initializer(-0.02, 0.02) with tf.variable_scope("model", reuse=None, initializer=initializer): model = BLSTM(400, 20, embedding_matrix, attention=True, num_epochs=100, dropout=0.3) print("training model") tf.global_variables_initializer().run() # 训练的迭代次数 num = 10 model.train(sess, model_path, X_train, y_train, X_train, y_train, 0, num_epochs=num) print("success!")