Esempio n. 1
0
 def __init__(self):
     self.history_savedir = None
     self.detail_savedir = None
     self.logger = None
     self.user = self.create_user()
     self.rule_policy = RulePolicy()
     self.dst = DialogStateTracker(UserPersonal, FLAGS.print, self.logger)
     self.data_manager = DataManager(os.path.join(BASE_DIR, 'data/tmp'))
     self.nlu_manager = NLUManager(NLU_save_path_dict)
     # self.nlg_template = NLG_template
     self.turn_num = 1
     self.dialog_history = []
Esempio n. 2
0
    def feature_processing(self, filename, trip=1, workers=8):
        """Read the data records and create data features
        """
        accelerometer_data = dm.trip_data_to_df(filename, trip=trip)
        segments = self.split_segments(accelerometer_data)

        dim = ceil(len(segments) / workers)
        chunks = (segments[k:k + dim] for k in range(0, len(segments), dim))

        with ProcessPoolExecutor(max_workers=workers) as executor:
            futures = [
                executor.submit(self.features, chunk) for chunk in chunks
            ]

        segment_features = []

        for future in futures:
            segment_features.append(future.result())

        segments_df = pd.concat(segment_features, ignore_index=True)
        segments_df = segments_df.sort_values('Time')

        # Average features to 1 minute intervals
        segments = self.split_segments(
            segments_df, time_intervals=self.AVG_FEATURES_INTERVALS)

        dim = ceil(len(segments) / workers)
        chunks = (segments[k:k + dim] for k in range(0, len(segments), dim))

        with ProcessPoolExecutor(max_workers=workers) as executor:
            futures = [
                executor.submit(self.average_features, chunk)
                for chunk in chunks
            ]

        average_features = []
        for future in futures:
            average_features.append(future.result())

        avg_features_df = pd.concat(average_features, ignore_index=True)
        avg_features_df = avg_features_df.sort_values('Time')

        return avg_features_df
Esempio n. 3
0
class OnlineStoreApp(App):
    dataManager = DataManager()
    ordersList = ListProperty([])
    
    def app_func(self):
        '''Wrapper functions for the async processes.
        '''
        def run_wrapper():
            # Run the Kivy UI
             self.async_run()
            exit(0)

        return asyncio.gather(run_wrapper())
    
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def build(self):
        return OrderScreen()
Esempio n. 4
0
    def get_joined_trips_data(self, trips):

        joineds = []

        for trip in trips:
            print(f"processing trip {trip}")
            files = ['Pixel_accelerometer', f'Pixel_gyro_{trip}']
            results = []

            for file in files:
                result = self.feature_processing(file, trip=trip)
                file = self.clean_file_name(file)
                result.rename(columns={
                    'msm': f'msm_{file}',
                    'variance': f'variance_{file}'
                },
                              inplace=True)
                results.append(result)

            # results = []
            # for future in futures:
            # results.append(future.result())

            pre_result = pd.concat(results, axis=1)
            result = self.remove_dup_columns(pre_result)
            # print(result)

            data = dm.trip_data_to_df("Pixel_activity", trip=trip)
            # Average features to 1 minute intervals
            data_segments = self.split_segments(
                data, time_intervals=self.AVG_FEATURES_INTERVALS)

            avg_classification_df = self.average_classification(data_segments)

            joined_df = self.join_sensors_classification_data(
                result, avg_classification_df)

            joineds.append(joined_df)

        return joineds
Esempio n. 5
0
 def setUp(self):
     removeLeftOverDbFiles()
     DataManagerUnitTest.dm = DataManager(
         configObj=DataManagerUnitTest.testConfig)
Esempio n. 6
0
    if 'chatting' in SysAct.keys():
        strategy = random.choice([0,1,2])
        if strategy == 0:
            nl += '您好,我是中国移动业务咨询机器人,可以帮助您进行套餐查找、业务咨询、个人信息查询等,请问您需要什么帮助吗?'
        elif strategy == 1:
            nl += '抱歉,您说的这些我不太理解,但我可以在套餐查找、业务咨询、个人信息查询等方面为您提供帮助的~'
        else:
            nl += '请不要再调戏我了,问一些套餐查找、业务咨询、个人信息查询方面的问题吧'
    # if 'repeat' in SysAct.keys():
    #     nl += "抱歉,能再重述一遍么?"

    return nl


if __name__ == '__main__':
    data_manager = DataManager('../data/tmp')
    offer_entity = data_manager.SearchingByConstraints('套餐', {"功能费": [700, 900]})[0]
    compared_entities = data_manager.SearchingByConstraints('套餐', {"功能费": [700, 900]})
    domain = '套餐'
    SysAct = {
        'ex0': {
        'offer': offer_entity,
        'inform':['产品介绍', '套餐内容_国内主叫', '套餐内容_国内流量', '套餐内容_国内短信', '结转规则', '是否包含港澳台地区']},
         'ex1': #self.UsrAct == "告知":
         {'offer': offer_entity,
          'inform': ["产品介绍"],
          'reqmore': None,  # None 是因为 reqmore 没有参数
          'domain':domain},
        'ex2':
        {'request': ["套餐内容_国内流量", "套餐内容_国内主叫"],
                              'domain': domain},
Esempio n. 7
0
def train_requestable(data_tmp_path):
    """
    用于训练模型,先训练完存好了才能用
    训练用 early stopping
    :param data_tmp_path:  data tmp 文件夹位置
    """
    print('载入数据管理器...')
    data_manager = DataManager(data_tmp_path)

    print('载入训练数据...')
    informable_slot_datasets, requestable_slot_datasets = generate_dataset(
        data_manager.DialogData)

    print('载入 requestable slot detector...')
    init_learning_rate = 0.005
    graph = tf.Graph()
    with graph.as_default():
        requestable_slots_models = {}
        for k, v in All_requestable_slots_order.items():
            requestable_slots_models[k] = RequestableSlotDector(
                str(v), learning_rate=init_learning_rate)

    with tf.Session(graph=graph,
                    config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        sess.run(tf.group(tf.global_variables_initializer()))
        saver = tf.train.Saver()
        # saver.restore(sess, "./ckpt/requestable/model.ckpt")
        # 训练 requestable slots
        requestable_slots_F1s = {}
        for slot, model in requestable_slots_models.items():
            average_loss = 0
            learning_rate = init_learning_rate
            best_F1 = 0
            tolerance = 30
            tolerance_count = 0
            display_step = 10
            for step in range(5000):
                step += 1
                batch_data, batch_output = requestable_slot_datasets[
                    slot].next_batch()
                char_emb_matrix, word_emb_matrix, seqlen = data_manager.sent2num(
                    batch_data)

                _, training_loss = sess.run(
                    [model.train_op, model.final_loss],
                    feed_dict={
                        model.char_emb_matrix: char_emb_matrix,
                        model.word_emb_matrix: word_emb_matrix,
                        model.output: batch_output
                    })
                average_loss += training_loss / display_step
                if step % display_step == 0:
                    batch_data, batch_output = requestable_slot_datasets[
                        slot].get_testset()
                    char_emb_matrix, word_emb_matrix, seqlen = data_manager.sent2num(
                        batch_data)
                    pred, accu = sess.run(
                        [model.predict, model.accuracy],
                        feed_dict={
                            model.char_emb_matrix: char_emb_matrix,
                            model.word_emb_matrix: word_emb_matrix,
                            model.output: batch_output
                        })
                    F1 = get_F1score(batch_output, pred.tolist())
                    if best_F1 < F1:
                        best_F1 = F1
                        tolerance_count = 0
                        if not os.path.exists("./ckpt/requestable/"):
                            os.makedirs("./ckpt/requestable/")
                        saver.save(sess, "./ckpt/requestable/model.ckpt")
                    if tolerance_count == tolerance:
                        break
                    print("%s, step % 4d, loss %0.4f, F1 %0.4f, accu %0.4f" %
                          (slot, step, average_loss, F1, accu))
                    average_loss = 0
                    tolerance_count += 1
                    learning_rate = max(learning_rate * 0.98, 0.001)
                    sess.run(model.update_lr,
                             feed_dict={model.new_lr: learning_rate})
            print("requestable slot: %s, best F1 %0.4f" % (slot, best_F1))
            requestable_slots_F1s[slot] = best_F1
        print(requestable_slots_F1s)
        print(
            sum(requestable_slots_F1s.values()) /
            len(requestable_slots_F1s.values()))
Esempio n. 8
0
def train_informable(data_tmp_path):
    """
    用于训练模型,先训练完存好了才能用
    训练用 early stopping
    :param data_tmp_path:  data tmp 文件夹位置
    """
    print('载入数据管理器...')
    data_manager = DataManager(data_tmp_path)

    print('载入训练数据...')
    informable_slot_datasets, requestable_slot_datasets = generate_dataset(
        data_manager.DialogData)

    print('载入 informable slot detector ...')
    init_learning_rate = 0.005
    informable_batch_ratios = {  # 不同slot 的minibatch ratio
        "通话时长": [2, 8, 8, 8],
        "流量": [4, 8, 8, 8],
        "功能费": [4, 8, 8, 8]
    }
    graph = tf.Graph()
    with graph.as_default():
        informable_slots_models = {
            "功能费": InformableSlotDector('cost',
                                        learning_rate=init_learning_rate),
            "流量": InformableSlotDector('data',
                                       learning_rate=init_learning_rate),
            "通话时长": InformableSlotDector('time',
                                         learning_rate=init_learning_rate),
        }
    with tf.Session(graph=graph,
                    config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        sess.run(tf.group(tf.global_variables_initializer()))
        saver = tf.train.Saver()
        # saver.restore(sess, "./ckpt/informable/model.ckpt")

        # 训练 informable slots
        informable_slots_accus = []
        for slot, model in informable_slots_models.items():
            learning_rate = init_learning_rate
            average_loss = 0
            best_accu = 0
            tolerance = 20
            tolerance_count = 0
            display_step = 10
            for step in range(5000):
                step += 1
                batch_data, batch_output = informable_slot_datasets[
                    slot].next_batch(informable_batch_ratios[slot])
                char_emb_matrix, word_emb_matrix, seqlen = data_manager.sent2num(
                    batch_data)
                _, training_loss = sess.run(
                    [model.train_op, model.final_loss],
                    feed_dict={
                        model.char_emb_matrix: char_emb_matrix,
                        model.word_emb_matrix: word_emb_matrix,
                        model.output: batch_output
                    })
                average_loss += training_loss / display_step
                if step % display_step == 0:
                    batch_data, batch_output = informable_slot_datasets[
                        slot].get_testset()
                    char_emb_matrix, word_emb_matrix, seqlen = data_manager.sent2num(
                        batch_data)
                    pred, accu = sess.run(
                        [model.predict, model.accuracy],
                        feed_dict={
                            model.char_emb_matrix: char_emb_matrix,
                            model.word_emb_matrix: word_emb_matrix,
                            model.output: batch_output
                        })
                    if best_accu < accu:
                        best_accu = accu
                        tolerance_count = 0
                        if not os.path.exists("./ckpt/informable/"):
                            os.makedirs("./ckpt/informable/")
                        saver.save(sess, "./ckpt/informable/model.ckpt")
                    if tolerance_count == tolerance:
                        break
                    print("%s, step % 4d, loss %0.4f, accu %0.4f" %
                          (slot, step, average_loss, accu))
                    average_loss = 0
                    tolerance_count += 1
                    learning_rate = max(learning_rate * 0.95, 0.0001)
                    sess.run(model.update_lr,
                             feed_dict={model.new_lr: learning_rate})
            print("informable slot: %s, best accu %0.4f" % (slot, best_accu))
            informable_slots_accus.append(best_accu)
Esempio n. 9
0
        batch_output = []
        domain_id = domains.index(domain)
        batch_output += [domain_id] * len(batch_data)

        return batch_data, batch_output, end_flag


def generate_domain_dataset(DialogData, domain_data_ids):
    dialog_data = copy.deepcopy(DialogData)
    domain_datas = {}
    for domain, data_ids in domain_data_ids.items():
        domain_datas[domain] = []
        for data_id in data_ids:
            domain_datas[domain] += dialog_data[data_id]["用户回复示例"]
    domain_dataset = DomainDataset(domain_datas['个人'], domain_datas['套餐'],
                                   domain_datas['流量'], domain_datas['WLAN'],
                                   domain_datas['号卡'], domain_datas['国际港澳台'],
                                   domain_datas['家庭多终端'])
    return domain_dataset


if __name__ == '__main__':
    data_manager = DataManager('../../../data/tmp')
    domain_dataset = generate_domain_dataset(data_manager.DialogData,
                                             domain_data_ids)
    binput, boutput = domain_dataset.next_batch()
    pprint.pprint(domain_dataset.next_batch())
    binput, boutput = domain_dataset.next_batch()
    pprint.pprint(domain_dataset.next_batch())
Esempio n. 10
0
from datetime import datetime
from data.DataManager import DataManager
from structures.NeuralNetwork import NeuralNetwork
from structures.functions import sigmoid, d_sigmoid, same, d_same
from structures.layers.DenseLayer import DenseLayer


def classify(arr):
    a, b = max((x, y) for y, x in enumerate(arr))
    return b


if __name__ == '__main__':
    dm = DataManager('mnist.pkl.gz')

    nn = NeuralNetwork()
    nn.add_layer(DenseLayer(784, 400, sigmoid, d_sigmoid))
    nn.add_layer(DenseLayer(400, 150, sigmoid, d_sigmoid))
    nn.add_layer(DenseLayer(150, 50, sigmoid, d_sigmoid))
    nn.add_layer(DenseLayer(50, 10, same, d_same))

    curtime = datetime.now()
    nn.train(dm.train_set, epochs=30, epoch_range=1)
    t = datetime.now() - curtime
    print('{} seconds for training'.format(t.seconds))
    print('correct for {}% on train set'.format(nn.calculate_correct(dm.test_set)))

    results = []
    for x, y in dm.test_set[:75]:
        res = nn.feed_forward(x)
        res = dm.classify(res)
Esempio n. 11
0
        self.batch_id[user_act] += test_batchsize

        batch_output = []
        user_act_id = user_acts.index(user_act)
        batch_output += [user_act_id] * len(batch_data)

        return batch_data, batch_output, end_flag


def generate_user_act_dataset(DialogData, user_act_data_ids):
    dialog_data = copy.deepcopy(DialogData)
    user_act_datas = {}
    for user_act, data_ids in user_act_data_ids.items():
        if user_act in ['问询费用选项', '问询通话时长选项', '问询流量选项']:
            user_act = '问询说明'
        if user_act not in user_act_datas.keys():
            user_act_datas[user_act] = []
        for data_id in data_ids:
            user_act_datas[user_act] += dialog_data[data_id]["用户回复示例"]
    user_act_dataset = UserActDataset(user_act_datas)
    return user_act_dataset


if __name__ == '__main__':
    data_manager = DataManager(os.path.join(BASE_DIR, '../../../data/tmp'))
    user_act_dataset = generate_user_act_dataset(data_manager.DialogData,
                                                 user_act_data_ids)
    for user_act, data in user_act_dataset.data.items():
        print(user_act)
        print(len(data))
Esempio n. 12
0
class PassageFetcher(object):

    DATA_MANAGER = DataManager()
    LINE_CACHE_SIZE = 4
    LOOK_FORWARD_SIZE = 4
    TAG_LIMIT = 4
    NEGATORY_QUOTE = "'Remember not only to say the right thing in the right place, but far more difficult still, to leave unsaid the wrong thing at the tempting moment.' - Benjamin Franklin"

    @staticmethod
    def FetchAndCompilePassages(tags):
        print(tags)
        tags = [str(tag) for tag in tags]
        # Validate tags
        valid_tags = PassageFetcher.ValidateTags(tags)
        print(valid_tags)

        feed = valid_tags if len(
            valid_tags) < PassageFetcher.TAG_LIMIT else valid_tags[
                0:PassageFetcher.TAG_LIMIT]

        authors = []

        # Get passages
        result = "\n\n".join(PassageFetcher.FetchPassages(feed, authors))

        if result == None or result.isspace():
            return PassageFetcher.NEGATORY_QUOTE
        else:
            return tuple([result, feed, authors])

    @staticmethod
    def FetchPassages(tags, authors):
        return [PassageFetcher.PassagePipe(tag, authors) for tag in tags]

    @staticmethod
    def PassagePipe(tag, authors):
        locations = PassageFetcher.GetLocations(tag)
        if locations == None: return ""
        coordinates = PassageFetcher.GetRandomLocation(locations)
        try:
            authors.append(Novels[coordinates[0]])
        except Exception as e:
            print(e)
        raw_passage = PassageFetcher.ExtractPassage(coordinates)
        treated_passage = PassageFetcher.TreatPassage(raw_passage)
        return treated_passage

    @staticmethod
    def ValidateTags(tags):
        ### ADD THE SORT HERE ###
        valid_tags = []
        for tag in tags:
            try:
                PassageFetcher.DATA_MANAGER.DATA[tag.lower()]
                valid_tags.append(tag)
            except:
                print(tag + " was not found.")

        return valid_tags

    @staticmethod
    def GetLocations(tag):
        try:
            locations = PassageFetcher.DATA_MANAGER.DATA[tag.lower()]
        except Exception as e:
            print("Error: ")
            return None
        return locations

    @staticmethod
    def GetRandomLocation(locations):
        return random.choice(locations)

    @staticmethod
    def ExtractPassage(coordinates):

        line_index = 0
        word_index = 0

        # Open up the book
        with open("./data/books/" + coordinates[0]) as file:
            # Compile regex sentence pattern
            line_cache = []
            found_flag = False
            local_timer = PassageFetcher.LOOK_FORWARD_SIZE
            # Find line
            for line in file:
                words = line.split()
                # Find word
                for word in words:
                    # If the word is found, compile the passage response
                    if word_index == coordinates[2]:
                        found_flag = True

                    word_index += 1
                line_index += 1

                if line != '\r\n' and line != '\n' and line != '\r':
                    line_cache.append(line)
                    if len(
                            line_cache
                    ) > PassageFetcher.LINE_CACHE_SIZE and not found_flag:
                        line_cache.pop(0)

                if found_flag:
                    local_timer -= 1

                if local_timer <= 0:
                    break

        return "".join(line_cache)

    @staticmethod
    def TreatPassage(raw_passage):
        pattern = re.compile(r'([A-Z][^\.!?]*[\.!?])', re.M)
        sentences = pattern.findall(raw_passage)

        if len(sentences) > 3:
            sentences.pop(0)
            sentences.pop(len(sentences) - 1)
        return " ".join(sentences)