예제 #1
0
    def parse_dataset_size(self, info):
        index = 100
        for item in info:
            if item:
                break
            else:
                index = index * 10

        self.train_dataset = Dataset('train_' + self.dataset_size_dict[index])
        self.test_dataset = Dataset('test_1K.xlsx')
예제 #2
0
def load_dataset_into_memory():
    # load the dataset into memory
    db = Dataset(filename, delimiter=delimiter,
                 metadata=metadata)
    if debug:
        print('Db loaded:')
        print("Attributes: %s\nPredict Class: %s\nNumerical Classes: %s" %
              (db.attributes, db.target_attribute, db.numeric))
    return db
예제 #3
0
 def getDataset(self):
     QApplication.instance().processEvents()
     Validator.validateFilePath(self.inputFilePath)
     dataset = Dataset(self.inputFilePath, self.responseVariable, self.predictors, self.categoricalVariables)   
     dataset.validateVariableLists()
     dataset.preprocessDataset(self.standardize)
     return dataset
예제 #4
0
 def load_db(self):
     # Load DB
     try:
         self.db.close()
     except AttributeError:
         # db is not set
         pass
     try:
         self.db = Dataset(self.filename,
                           delimiter=self.delimiter_char.get(),
                           metadata=self.filename_meta)
         self.Error_Message['fg'] = "#267f36"
         self.Error_Message['text'] = "Dataset has been loaded successfully"
     except Exception as e:
         self.Error_Message['fg'] = "#ff3c3c"
         self.Error_Message['text'] = e
         return
예제 #5
0
        delimiter = ';'
        metadata = ""
        seed = None
        for i in range(len(argv)):
            if (argv[i] in ['--file', '-f']):
                filename = argv[i + 1]
            elif (argv[i] in ['--delimiter', '-d']):
                delimiter = argv[i + 1]
            elif (argv[i] in ['--meta', '-m']):
                metadata = argv[i + 1]
            elif (argv[i] in ['--seed', '-s']):
                seed = int(argv[i + 1])
            elif (argv[i] in ['--help', '-h']):
                print_usage(argv[0])
                exit(0)
        if seed is not None:
            random.seed(int(seed))

        # load the dataset to memory
        db = Dataset(filename, delimiter=delimiter, metadata=metadata)

        # generates a decision tree from the dataset
        tree = DecisionTree(db.data,
                            db.attributes,
                            db.target_attribute,
                            db.numeric,
                            single_tree_print=True)

        # print the resultant tree on the terminal
        print(tree)
예제 #6
0
    reg_layers = eval(args.reg_layers)
    num_negatives = args.num_neg
    learner = args.learner
    learning_rate = args.lr
    batch_size = args.batch_size
    epochs = args.epochs
    verbose = args.verbose

    topK = 10
    evaluation_threads = 1  # mp.cpu_count()
    print("MLP arguments: %s " % (args))
    model_out_file = 'Pretrain/%s_MLP_%s_%d.h5' % (args.dataset, args.layers, time())

    # Loading data
    t1 = time()
    dataset = Dataset(args.path + args.dataset)
    train, testRatings, testNegatives = dataset.trainMatrix, dataset.testRatings, dataset.testNegatives
    num_users, num_items = train.shape
    print("Load data done [%.1f s]. #user=%d, #item=%d, #train=%d, #test=%d"
          % (time() - t1, num_users, num_items, train.nnz, len(testRatings)))

    # Build model
    model = get_model(num_users, num_items, layers, reg_layers)
    if learner.lower() == "adagrad":
        model.compile(optimizer=Adagrad(lr=learning_rate), loss='binary_crossentropy')
    elif learner.lower() == "rmsprop":
        model.compile(optimizer=RMSprop(lr=learning_rate), loss='binary_crossentropy')
    elif learner.lower() == "adam":
        model.compile(optimizer=Adam(lr=learning_rate), loss='binary_crossentropy')
    else:
        model.compile(optimizer=SGD(lr=learning_rate), loss='binary_crossentropy')
예제 #7
0
def loadGMF():
    dataset = Dataset('../Data/ml-1m')
    model = src.GMF.get_model(dataset.num_users, dataset.num_items, 8)
    model.load_weights('../Pretrain/ml-1m_GMF_8_1501651698.h5')
    hits, ndcgs = src.evaluate.evaluate_model(model, dataset.testRatings,
                                              dataset.testNegatives, 10, 1)
예제 #8
0
    cumsum = np.cumsum(np.insert(x, 0, 0))
    sums = cumsum[N:] - cumsum[:-N]
    return sums[::N]


if __name__ == '__main__':
    config = ConfigParser.ConfigParser()
    config.read('config.ini')
    section = 'SEARCH'

    data_file = config.get(section, 'ARTICLES_DATA_FILE')
    search_term = config.get(section, 'SEARCH_TERM').decode('utf8')
    window = int(config.get(section, 'WINDOW_SIZE'))
    output_file = config.get(section, 'OUTPUT_FILE')

    article_dataset = Dataset(data_file)

    filtered_articles = article_dataset.search_articles(search_term)

    counts_map = dd(lambda: [0, 0])

    for a in filtered_articles:
        date = a[0]
        articles, words = counts_map[date]
        counts_map[date] = [articles + 1, words + a[1]]

    delta = END_DATE - START_DATE
    for i in range(delta.days + 1):
        date = START_DATE + dt.timedelta(days=i)
        if date not in counts_map:
            counts_map[date] = [0, 0]