Ejemplo n.º 1
0
class TestDataLoader(TestCase):

    def test_load_data(self):
        word2index = {'/': 0, '<': 1, '>': 2, 's': 3, '、': 4, '。': 5,
                      'が': 6, 'た': 7, 'で': 8, 'に': 9, 'の': 10,
                      'は': 11, 'を': 12}
        index2word = {0: '/', 1: '<', 2: '>', 3: 's', 4: '、', 5: '。',
                      6: 'が', 7: 'た', 8: 'で', 9: 'に', 10: 'の',
                      11: 'は', 12: 'を'}
        window_data = [('<', '/'), ('<', 's'), ('<', '>'), ('/', '<'),
                       ('/', 's'), ('/', '>'), ('s', '<'), ('s', '/'),
                       ('s', '>'), ('>', '<'), ('>', '/'), ('>', 's')]
        X_ik = {('/', '<'): 2, ('<', '/'): 2, ('/', '>'): 2, ('>', '/'): 2,
                ('/', 's'): 2, ('s', '/'): 2, ('<', '>'): 2, ('>', '<'): 2,
                ('<', 's'): 2, ('s', '<'): 2, ('>', 's'): 2, ('s', '>'): 2}

        self.test_data_loader = DataLoader()
        self.test_japanese_wiki_data = 'test/test_data/jawiki_test.txt'
        test_word2index, test_index2word, test_window_data, \
        test_X_ik, test_weightinhg_dict = self.test_data_loader.load_data(file_name=self.test_japanese_wiki_data)  # noqa
        # Reference
        #     https://stackoverflow.com/questions/11026959/writing-a-dict-to-txt-file-and-reading-it-back  # noqa
        APP_PATH = os.path.dirname(__file__)
        with open(APP_PATH + '/test_data/test_weighting_dict.pkl', 'rb') as handle:  # noqa
            weighting_dict = pickle.loads(handle.read())
        print(test_word2index)
        print(test_index2word)
        print(test_window_data)
        print(test_X_ik)
        assert word2index == test_word2index
        assert index2word == test_index2word
        assert window_data == test_window_data
        assert test_X_ik == X_ik
        assert test_weightinhg_dict == weighting_dict
Ejemplo n.º 2
0
class TestTrainer(TestCase):
    def test_train_method(self):
        self.test_data_loader = DataLoader()
        self.test_japanese_wiki_data = 'test/test_data/jawiki_test.txt'
        test_word2index, test_index2word, test_window_data, \
            test_X_ik, test_weightinhg_dict = self.test_data_loader.load_data(file_name=self.test_japanese_wiki_data)  # noqa
        self.test_prepare_train_data = PrepareTrainData()
        test_train_data = \
            self.test_prepare_train_data.prepare_train_data_method(
                window_data=test_window_data,
                word2index=test_word2index,
                weighting_dic=test_weightinhg_dict,
                X_ik=test_X_ik)
        self.model = Glove(vocab_size=len(test_word2index))
        self.trainer = Trainer(model=self.model)

        self.trainer.train_method(train_data=test_train_data)

        word_similarity = self.trainer.word_similarity(
            target=self.test_data_loader.vocab[0],
            vocab=self.test_data_loader.vocab,
            word2index=test_word2index,
            top_rank=2)

        word_similarity_check = ['<', '>', 's']
        word_similarity_bool = False

        for word in word_similarity:
            if word[0] in word_similarity_check:
                word_similarity_bool = True

        assert word_similarity_bool is True
Ejemplo n.º 3
0
class TestClassifier(TestCase):
    def test_classify(self):
        model_name = '../models/glove_wiki/glove_model_40.pth'
        output_file = 'test/test_data/glove_classify_model.pkl'
        compare_output_file = 'glove_classify_model.pkl'
        classifier = Classifier(model_name=model_name)
        classifier.classify()
        assert True is filecmp.cmp(output_file, compare_output_file)

    def test_classify_predict(self):
        self.test_data_loader = DataLoader()
        self.test_japanese_wiki_data = 'test/test_data/jawiki_test.txt'
        test_word2index, test_index2word, test_window_data, \
        test_X_ik, test_weightinhg_dict = self.test_data_loader.load_data(file_name=self.test_japanese_wiki_data)  # noqa
        model_name = '../models/glove_wiki/glove_model_40.pth'
        output_file = 'test/test_data/glove_classify_model.pkl'
        classifier = Classifier(model_name=model_name)
        print(test_word2index)
        classes = classifier.classify_predict(word='の',
                                              classify_model_name=output_file,
                                              word2index=test_word2index)
        assert 2 == classes
        classes = classifier.classify_predict(word='どうよ?',
                                              classify_model_name=output_file,
                                              word2index=test_word2index)
        assert 9999 == classes
Ejemplo n.º 4
0
def main(configs: Configs = None, data_loader: DataLoader = None):
    """
    main function for data processor from raw files SAP to tables in database
    to be consumed by forecast model

    usage example:
        $ python spike-challenge/src/make_dataset.py

    """
    if configs is None:
        configs = Configs('default_config.yaml')

    if data_loader is None:

        data_loader = DataLoader()

        data_loader.load_data()
Ejemplo n.º 5
0
class TestGloveVisualize(TestCase):
    def test_visualize(self):
        self.test_data_loader = DataLoader()
        self.test_japanese_wiki_data = '../data/raw/jawiki_only_word_random_choose.txt'
        test_word2index, test_index2word, test_window_data, \
        test_X_ik, test_weightinhg_dict = self.test_data_loader.load_data(
            file_name=self.test_japanese_wiki_data)  # noqa

        model_name = '../models/glove_wiki/glove_model_40.pth'

        self.test_glove_visualize = GloveVisualize(model_name=model_name)
        self.test_glove_visualize.visualize(vocab=self.test_data_loader.vocab)
Ejemplo n.º 6
0
class TestGloveVisualize(TestCase):
    def test_visualize(self):
        self.test_data_loader = DataLoader()
        self.test_japanese_wiki_data = '../data/raw/source_replay_twitter_data_sort.txt'
        test_word2index, test_index2word, test_window_data, \
        test_X_ik, test_weightinhg_dict = self.test_data_loader.load_data(
            file_name=self.test_japanese_wiki_data)  # noqa

        model_name = '../models/glove_model_40.pth'
        test_word2index.update({'<UNK>': len(test_word2index)})

        self.test_glove_visualize = GloveVisualize(model_name=model_name)
        self.test_glove_visualize.visualize(vocab=self.test_data_loader.vocab)
Ejemplo n.º 7
0
def main():
    parser = argparse.ArgumentParser(description="Training glove model")

    parser.add_argument(
        "-c",
        "--train_data",
        metavar="train_data",
        # type=str, default='../data/raw/jawiki_only_word_random_choose.txt',
        type=str,
        default='../data/raw/source_replay_twitter_data_sort.txt',
        dest="train_data",
        help="set the training data ")
    parser.add_argument("-e",
                        "--embedding_size",
                        metavar="embedding_size",
                        type=int,
                        default=300,
                        dest="embedding_size",
                        help="set the embedding size")
    args = parser.parse_args()

    data_loader = DataLoader()
    japanese_wiki_data = args.train_data
    word2index, index2word, window_data, X_ik, weightinhg_dict = \
        data_loader.load_data(file_name=japanese_wiki_data)  # noqa
    print(word2index)

    prepare_train_data = PrepareTrainData()
    train_data = \
        prepare_train_data.prepare_train_data_method(
            window_data=window_data,
            word2index=word2index,
            weighting_dic=weightinhg_dict,
            X_ik=X_ik)

    model = Glove(vocab_size=len(word2index),
                  projection_dim=args.embedding_size)
    trainer = Trainer(model=model)

    trainer.train_method(train_data=train_data)

    word_similarity = trainer.word_similarity(target=data_loader.vocab[0],
                                              vocab=data_loader.vocab,
                                              word2index=word2index,
                                              top_rank=2)
    print(word_similarity)
Ejemplo n.º 8
0
class TestPrepareTrainData(TestCase):
    def test_prepare_train_data_method(self):
        self.test_data_loader = DataLoader()
        self.test_japanese_wiki_data = 'test/test_data/jawiki_test.txt'
        test_word2index, test_index2word, test_window_data, \
            test_X_ik, test_weightinhg_dict = self.test_data_loader.load_data(file_name=self.test_japanese_wiki_data)  # noqa
        self.test_prepare_train_data = PrepareTrainData()
        test_train_data = \
            self.test_prepare_train_data.prepare_train_data_method(
                window_data=test_window_data,
                word2index=test_word2index,
                weighting_dic=test_weightinhg_dict,
                X_ik=test_X_ik)
        APP_PATH = os.path.dirname(__file__)
        output_file = APP_PATH + '/test_data/train_data.pkl'
        compare_output_file = APP_PATH + '/test_data/test_train_data.pkl'
        with open(output_file, 'wb') as handle:
            pickle.dump(test_train_data, handle)
        assert True is filecmp.cmp(output_file, compare_output_file)
Ejemplo n.º 9
0
    optimizer = checkpoint["optimizer"]
else:
    print("==> Building model...")
    net = attrWCNNg(num_attr=312, num_classes=NUM_CLASSES)

# print(torch_summarize(net))
# print(net)
if USE_GPU:
    net.cuda()
    # net = torch.nn.DataParallel(net.module, device_ids=range(torch.cuda.device_count()))
    cudnn.benchmark = True

log = open("./log/" + MODEL_NAME + '_cub.txt', 'a')
print("==> Preparing data...")
data_loader = DataLoader(data_dir=args.data, image_size=IMAGE_SIZE, batch_size=BATCH_SIZE)
inputs, classes = next(iter(data_loader.load_data()))
# out = torchvision.utils.make_grid(inputs)
# data_loader.show_image(out, title=[data_loader.data_classes[c] for c in classes])
train_loader = data_loader.load_data(data_set='train')
test_loader = data_loader.load_data(data_set='val')
# criterion = nn.CrossEntropyLoss()
criterion = RegLoss(lamda1=lamda1, lamda2=lamda2, superclass="cub")
# criterion = FocalLoss(class_num=NUM_CLASSES, gamma=0)


# def one_hot_emb(batch, depth=NUM_CLASSES):
#     emb = nn.Embedding(depth, depth)
#     emb.weight.data = torch.eye(depth)
#     return emb(batch).data
def one_hot_emb(y, depth=NUM_CLASSES):
    y = y.view((-1, 1))
Ejemplo n.º 10
0
import pandas as pd

from data.data_loader import DataLoader

data_loader = DataLoader()
# Load files
files = ["orders", "order_products"]
data = data_loader.load_data(files)

# Get orders and user_ids to predict (test)
orders = data["orders"]
test = orders[orders["eval_set"] == "test"]
test_uids = test["user_id"]

orders_prior = orders[(orders["eval_set"] == "prior")
                      & (orders["user_id"].isin(test_uids))]

# Get products of prior orders
products = data["order_products"]
products_prior = products[products["order_id"].isin(orders_prior["order_id"])]

# Get order_id of last order per user
orders_prior_ids = orders_prior.groupby("user_id")["order_number"].idxmax()
last_order_ids = orders_prior.loc[orders_prior_ids]["order_id"]

# Aggregate all products of same order to a list and select last orders
products_prior_list = pd.DataFrame(
    products_prior.groupby('order_id')['product_id'].apply(list))
products_last_order = products_prior_list.loc[last_order_ids]

# Merge to get user_id and list of product_ids
Ejemplo n.º 11
0
def gzsl_test0(epoch, net, optimizer, log, gamma=2.):
    NUM_CLASSES = 50  # set the number of classes in your dataset
    num_seen_classes = 40
    NUM_ATTR = 85
    DATA_DIR = "/home/elvis/data/attribute/AwA/Animals_with_Attributes2/zsl/gzsl_test"
    BATCH_SIZE = 32
    IMAGE_SIZE = 224
    best_h = 55
    USE_GPU = torch.cuda.is_available()
    data_loader = DataLoader(data_dir=DATA_DIR,
                             image_size=IMAGE_SIZE,
                             batch_size=BATCH_SIZE)
    # train_loader = data_loader.load_data(data_set='train')
    test_loader = data_loader.load_data(data_set='val')
    criterion = nn.CrossEntropyLoss()

    net.eval()
    test_loss, correct_seen, correct_unseen, total_seen, total_unseen, total, loss = 0, 0, 0, 0, 0, 0, 0
    for batch_idx, (inputs, targets) in enumerate(test_loader):
        if USE_GPU:
            inputs, targets = inputs.cuda(), targets.cuda()
        inputs, targets = Variable(inputs, volatile=True), Variable(targets)
        out, attr = net(inputs)
        loss = criterion(out, targets)

        test_loss = loss.data[0]
        logit = out.data
        seen_prob, seen_class = torch.max(logit[:, :num_seen_classes], 1)
        unseen_prob, unseen_class = torch.max(logit[:, num_seen_classes:], 1)
        predicted = seen_class
        for i, spi in enumerate(seen_prob):
            if seen_prob[i] < unseen_prob[i] * gamma:
                predicted[i] = unseen_class[i] + num_seen_classes

        total += targets.size(0)
        correct_list = predicted.eq(targets.data).cpu()
        target_list = targets.data.cpu()
        for i, targeti in enumerate(target_list):
            if targeti < num_seen_classes:
                correct_seen += correct_list[i]
                total_seen += 1
            else:
                correct_unseen += correct_list[i]
                total_unseen += 1

        acc_seen = 100. * correct_seen / total_seen
        if total_unseen > 0:
            acc_unseen = 100. * correct_unseen / total_unseen
        else:
            acc_unseen = 0.
        progress_bar(
            batch_idx, len(test_loader),
            'Loss: %.3f | acc_seen: %.3f%% (%d/%d) | acc_unseen: %.3f%% (%d/%d)'
            % (test_loss / (batch_idx + 1), acc_seen, correct_seen, total_seen,
               acc_unseen, correct_unseen, total_unseen))
    acc_seen = 100. * correct_seen / total_seen
    acc_unseen = 100. * correct_unseen / total_unseen
    h = 2. / (1. / acc_seen + 1. / acc_unseen)
    print("acc_seen: %.3f%% (%d/%d) | acc_unseen: %.3f%% (%d/%d) | H: %.3f%%" %
          (acc_seen, correct_seen, total_seen, acc_unseen, correct_unseen,
           total_unseen, h))
    log.write(str(acc_seen) + ' ' + str(acc_unseen) + ' ' + str(h) + " ")
    if h > best_h:
        MODEL_SAVE_FILE = "gzsl_awa2_epoch%dacc%d.pth" % (epoch, int(h))
        print(MODEL_SAVE_FILE)
        state = {'net': net, 'acc': h, 'epoch': epoch, 'optimizer': optimizer}
        torch.save(state, "./checkpoints/" + MODEL_SAVE_FILE)
Ejemplo n.º 12
0
def gzsl_test(epoch, net, optimizer):
    NUM_CLASSES = 50  # set the number of classes in your dataset
    num_seen_classes = 40
    NUM_ATTR = 85
    DATA_DIR = "/home/elvis/data/attribute/AwA/Animals_with_Attributes2/zsl/gzsl_test"
    BATCH_SIZE = 32
    IMAGE_SIZE = 224
    best_h = 40
    USE_GPU = torch.cuda.is_available()
    # order_awa2_attr = np.load("data/order_awa2_attr.npy")
    # w_attr_sum = np.sum(w_attr, 0)
    # w_attr = w_attr/w_attr_sum
    # w_attr[:, 0].sum()
    # order_awa2_attr = torch.FloatTensor(order_awa2_attr / 100.).cuda()  # 50 * 312

    data_loader = DataLoader(data_dir=DATA_DIR,
                             image_size=IMAGE_SIZE,
                             batch_size=BATCH_SIZE)
    # train_loader = data_loader.load_data(data_set='train')
    test_loader = data_loader.load_data(data_set='val')
    criterion = nn.CrossEntropyLoss()

    net.eval()
    test_loss, correct_seen, correct_unseen, total_seen, total_unseen, total, loss = 0, 0, 0, 0, 0, 0, 0
    for batch_idx, (inputs, targets) in enumerate(test_loader):
        if USE_GPU:
            inputs, targets = inputs.cuda(), targets.cuda()
        inputs, targets = Variable(inputs, volatile=True), Variable(targets)
        out, attr = net(inputs)
        loss = criterion(out, targets)

        test_loss = loss.data[0]
        _, predicted = torch.max(out.data, 1)
        total += targets.size(0)
        correct_list = predicted.eq(targets.data).cpu()
        target_list = targets.data.cpu()
        for i, targeti in enumerate(target_list):
            if targeti < 40:
                correct_seen += correct_list[i]
                total_seen += 1
            else:
                correct_unseen += correct_list[i]
                total_unseen += 1

        acc_seen = 100. * correct_seen / total_seen
        if total_unseen > 0:
            acc_unseen = 100. * correct_unseen / total_unseen
        else:
            acc_unseen = 0.
        progress_bar(
            batch_idx, len(test_loader),
            'Loss: %.3f | acc_seen: %.3f%% (%d/%d) | acc_unseen: %.3f%% (%d/%d)'
            % (test_loss / (batch_idx + 1), acc_seen, correct_seen, total_seen,
               acc_unseen, correct_unseen, total_unseen))
    acc_seen = 100. * correct_seen / total_seen
    acc_unseen = 100. * correct_unseen / total_unseen
    h = 2. / (1. / acc_seen + 1. / acc_unseen)
    print("acc_seen: %.3f%% (%d/%d) | acc_unseen: %.3f%% (%d/%d) | H: %.3f%%" %
          (acc_seen, correct_seen, total_seen, acc_unseen, correct_unseen,
           total_unseen, h))
    if h > best_h:
        MODEL_SAVE_FILE = "gzsl_awa2_epoch%dacc%d.pth" % (epoch, int(h))
        print(MODEL_SAVE_FILE)
        state = {'net': net, 'acc': h, 'epoch': epoch, 'optimizer': optimizer}
        torch.save(state, "./checkpoints/" + MODEL_SAVE_FILE)
Ejemplo n.º 13
0
def zsl_test(epoch, net, optimizer, log):
    NUM_CLASSES = 50  # set the number of classes in your dataset
    NUM_SEEN = 40
    NUM_UNSEEN = NUM_CLASSES - NUM_SEEN
    NUM_ATTR = 85
    DATA_DIR = "/home/elvis/data/attribute/AwA/Animals_with_Attributes2/zsl/zsl_test"
    BATCH_SIZE = 32
    IMAGE_SIZE = 224
    best_acc = 74
    USE_GPU = torch.cuda.is_available()
    order_awa2_attr = np.load("data/order_awa2_attr.npy")
    # w_attr_sum = np.sum(w_attr, 0)
    # w_attr = w_attr/w_attr_sum
    # w_attr[:, 0].sum()
    order_awa2_attr = order_awa2_attr[NUM_SEEN:, :]
    order_awa2_attr = torch.FloatTensor(order_awa2_attr /
                                        100.).cuda()  # 50 * 312
    net.fc2 = nn.Linear(NUM_ATTR, NUM_CLASSES, bias=False)
    net.fc2.weight = nn.Parameter(order_awa2_attr, requires_grad=False)
    # print(torch_summarize(net))
    # print(net)
    net.cuda()
    data_loader = DataLoader(data_dir=DATA_DIR,
                             image_size=IMAGE_SIZE,
                             batch_size=BATCH_SIZE)
    train_loader = data_loader.load_data(data_set='train')
    test_loader = data_loader.load_data(data_set='val')
    criterion = nn.CrossEntropyLoss()

    net.eval()
    test_loss, correct, total, loss = 0, 0, 0, 0
    correct_bin = np.zeros(NUM_UNSEEN)
    total_bin = np.zeros(NUM_UNSEEN)
    for batch_idx, (inputs, targets) in enumerate(test_loader):
        if USE_GPU:
            inputs, targets = inputs.cuda(), targets.cuda()
        inputs, targets = Variable(inputs, volatile=True), Variable(targets)
        out, attr = net(inputs)
        loss = criterion(out, targets)

        test_loss = loss.data[0]
        _, predicted = torch.max(out.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

        correct_list = predicted.eq(targets.data).cpu()
        target_list = targets.data.cpu()
        for i, targeti in enumerate(target_list):
            correct_bin[targeti] += correct_list[i]
            total_bin[targeti] += 1.

        acc = 100. * correct / total
        progress_bar(
            batch_idx, len(test_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
            (test_loss / (batch_idx + 1), acc, correct, total))

    acc = 100. * correct / total
    acc_bin = 100. * correct_bin / total_bin
    np.save("data/sun_acc_bin.npy", acc_bin)
    print("ZSL acc_per_class: %.3f%%(%d/%d)" %
          (np.mean(acc_bin), correct_bin[0], total_bin[0]))
    log.write(str(np.mean(acc_bin)) + ' ')
    if acc > best_acc:
        MODEL_SAVE_FILE = "zsl_resnet18_awa2_epoch%dacc%d.pth" % (epoch,
                                                                  int(acc))
        print(MODEL_SAVE_FILE)
        state = {
            'net': net,
            'acc': acc,
            'epoch': epoch,
            'optimizer': optimizer
        }
        torch.save(state, "./checkpoints/" + MODEL_SAVE_FILE)
Ejemplo n.º 14
0
def gzsl_test(epoch, net, optimizer):
    NUM_CLASSES = 717  # set the number of classes in your dataset
    Num_SEEN = 645
    NUM_ATTR = 102
    DATA_DIR = "/home/elvis/data/attribute/SUN/zsl/gzsl_test"
    BATCH_SIZE = 32
    IMAGE_SIZE = 224
    best_h = 50
    USE_GPU = torch.cuda.is_available()
    order_sun_attr = np.load("data/order_sun_attr.npy")
    # order_sun_attr[Num_SEEN:, :] = order_sun_attr[Num_SEEN:, :]
    # order_cub_attr = order_cub_attr[150:, :]
    order_sun_attr = torch.FloatTensor(order_sun_attr).cuda()  # 50 * 312
    net.fc2 = nn.Linear(NUM_ATTR, NUM_CLASSES, bias=False)
    net.fc2.weight = nn.Parameter(order_sun_attr, requires_grad=False)
    net.cuda()
    data_loader = DataLoader(data_dir=DATA_DIR,
                             image_size=IMAGE_SIZE,
                             batch_size=BATCH_SIZE)
    # train_loader = data_loader.load_data(data_set='train')
    test_loader = data_loader.load_data(data_set='val')
    criterion = nn.CrossEntropyLoss()

    net.eval()
    test_loss, correct_seen, correct_unseen, total_seen, total_unseen, total, loss = 0, 0, 0, 0, 0, 0, 0
    for batch_idx, (inputs, targets) in enumerate(test_loader):
        if USE_GPU:
            inputs, targets = inputs.cuda(), targets.cuda()
        inputs, targets = Variable(inputs, volatile=True), Variable(targets)
        out, attr = net(inputs)
        loss = criterion(out, targets)

        test_loss = loss.data[0]
        _, predicted = torch.max(out.data, 1)
        total += targets.size(0)
        correct_list = predicted.eq(targets.data).cpu()
        target_list = targets.data.cpu()
        for i, targeti in enumerate(target_list):
            if targeti < Num_SEEN:
                correct_seen += correct_list[i]
                total_seen += 1
            else:
                correct_unseen += correct_list[i]
                total_unseen += 1

        acc_seen = 100. * correct_seen / total_seen
        if total_unseen > 0:
            acc_unseen = 100. * correct_unseen / total_unseen
        else:
            acc_unseen = 0.
        progress_bar(
            batch_idx, len(test_loader),
            'Loss: %.3f | acc_seen: %.3f%% (%d/%d) | acc_unseen: %.3f%% (%d/%d)'
            % (test_loss / (batch_idx + 1), acc_seen, correct_seen, total_seen,
               acc_unseen, correct_unseen, total_unseen))
    acc_seen = 100. * correct_seen / total_seen
    acc_unseen = 100. * correct_unseen / total_unseen
    h = 2. / (1. / acc_seen + 1. / acc_unseen)
    print("acc_seen: %.3f%% (%d/%d) | acc_unseen: %.3f%% (%d/%d) | H: %.3f%%" %
          (acc_seen, correct_seen, total_seen, acc_unseen, correct_unseen,
           total_unseen, h))
    if h > best_h:
        MODEL_SAVE_FILE = "gzsl_resnet50_sun_epoch%dacc%d.pth" % (epoch,
                                                                  int(h))
        print(MODEL_SAVE_FILE)
        state = {'net': net, 'acc': h, 'epoch': epoch, 'optimizer': optimizer}
        torch.save(state, "./checkpoints/" + MODEL_SAVE_FILE)
Ejemplo n.º 15
0
    def get_qualified_shops(self, threshold=0.07):
        qualified_shops = [shop_id for shop_id in range(1, 2001) if self.get_best_loss(shop_id) <= threshold]
        return qualified_shops

    def get_unqualified_shops(self, threshold=0.07):
        unqualified_shops = [shop_id for shop_id in range(1, 2001) if self.get_best_loss(shop_id) > threshold]
        return unqualified_shops

    def get_mean_loss(self, threshold=0.07):
        all_loss = [self.get_best_loss(shop_id) for shop_id in range(1, 2001) if
                    self.get_best_loss(shop_id) < threshold]
        return np.mean(all_loss)


if __name__ == '__main__':
    loader_data = DataLoader.load_data()
    shop_info = loader_data['shop_info']
    user_pay = loader_data['user_pay']
    user_view = loader_data['user_view']
    shop_ids = range(1, 2001)
    ordinary_dates = pd.date_range(start='2015-10-10', end='2016-10-31', freq='D').strftime('%Y-%m-%d')
    business_dates = pd.date_range(start='2015-10-10', end='2016-10-31', freq='B').strftime('%Y-%m-%d')
    result = {}
    for shop_id in shop_ids[:1]:
        shop_info_data = shop_info.iloc[shop_id - 1].to_dict()
        user_pay_data = user_pay[user_pay.shop_id == shop_id]
        user_pay_data = user_pay_data.sort_values(by='time_stamp')
        user_pay_data['date'] = user_pay_data['time_stamp'].apply(lambda x: x[:10])
        user_pay_info = UserPayInfoBase(shop_id, shop_info_data, ordinary_dates)
        user_pay_info.set_flow(user_pay_data, ordinary_dates)
        result[shop_id] = user_pay_info
Ejemplo n.º 16
0
LEARNING_RATE = 0.001
REGULARIZATION_RATE = 1

embedding_dim = 100

logs_path = "/app/tmp/logs/20/"
data_root = "/app/data/datasets/amazon-fine-food-reviews/"

train_filename = "train_Reviews"
test_filename = "test_Reviews"
valid_filename = "valid_Reviews"

print("Loading data...")
train_data = DataLoader(data_root, train_filename, NUM_EPOCHS, BATCH_SIZE,
                        "Text", "Score")
train_data.load_data()
valid_data = DataLoader(data_root, valid_filename, NUM_EPOCHS, BATCH_SIZE,
                        "Text", "Score")
valid_data.load_data()

model = CLSTMModel(num_classes=NUM_CLASSES,
                   embedding_dim=embedding_dim,
                   sequence_len=train_data.sequence_len)

sess = tf.Session()
x = tf.placeholder(tf.int32, [None, train_data.sequence_len], name="x")
y = tf.placeholder(tf.int32, [None, 1], name="y")
lengths = tf.placeholder(tf.int32, [None])
keep_prob = tf.placeholder(tf.float32)

oh_y = tf.squeeze(tf.one_hot(y, depth=NUM_CLASSES, name='oh_y'))