def get_data_from_folder(trainfolderpath):
    imagePaths = list(p.list_images(trainfolderpath))
    features = []
    labels = []

    for (i, imagePath) in enumerate(imagePaths):
        # load the image and extract the class label (assuming that our
        # path as the format: /path/to/dataset/{class}.{image_num}.jpg
        image = cv2.imread(imagePath)
        label = imagePath.split(os.path.sep)[-1].split(".")[0]
        hist = extract_color_histogram(image)
        features.append(hist)
        labels.append(label)

        if i > 0 and i % 1000 == 0:
            print("[INFO] processed {}/{}".format(i, len(imagePaths)))

    # zmiana listy na tablicę
    features = np.array(features)
    labels = np.array(labels)

    (trainFeat, testFeat, trainLabels, testLabels) = train_test_split(
        features, labels, test_size=0.25, random_state=42)

    dataModel = DataModel()
    dataModel.setTrainFeat(trainFeat)
    dataModel.setTrainLabels(trainLabels)
    dataModel.setTestFeat(testFeat)
    dataModel.setTestLabels(testLabels)

    return dataModel
Esempio n. 2
0
 def loadData(self):
     # loads the data from mongodb in the table view
     self.search_text.clear()
     try:
         self.model = DataModel(self.mongo_obj.searchData({}, {"_id": 0}))
         self.proxyModel = QSortFilterProxyModel(self)
         self.proxyModel.setSourceModel(self.model)
         self.tableView.setModel(self.proxyModel)
     except Exception as e:
         self.alert("error", "Load failed", str(e))
Esempio n. 3
0
def build_server():
    server = Flask(__name__)
    server.config.from_object(SessionConfig)
    Session(server)

    app = DashAppWrapper(__name__,
                         server=server,
                         external_stylesheets=[dbc.themes.BOOTSTRAP])

    empty_data = pd.DataFrame(columns=states + ["natl_pop_vote", "dem_ec"])
    app.layout = DashLayout(DataModel(empty_data)).get_layout()
    set_app_callbacks(app)
    return server
def init_session_data():
    csv_filenames = glob(f"{data_folder}/*.csv")
    elections_data = data_functions.load_data(csv_filenames)
    new_session_dict = dict(
        data_model=DataModel(
            elections_data.iloc[random.sample(range(len(elections_data)), data_size)]
        ),
        random_data_sample=None,
        random_sample_mode=False,
        new_random_538_map=False,
        new_average_538_map=False,
        reset_button_pressed=False
    )
    set_session_dict(new_session_dict)
    return new_session_dict
Esempio n. 5
0
def save_data_db(data, session):
    for d in data:
        result = session.query(DataModel).filter_by(mes=d['mes'],
                                                    ano=d['ano']).first()

        if (result == None):
            event = DataModel(id=str(uuid.uuid4()),
                              ano=d['ano'],
                              bandeira=d['bandeira'],
                              mes=d['mes'],
                              numero_mes=d['numero_mes'],
                              valor=d['valor'],
                              created_at=datetime.datetime.now())

            session.add(event)
            session.commit()
Esempio n. 6
0
 def __init__(self):
     self.data = DataModel()
     self.xtrain_q = self.input_feature(self.data.X_train()[0])
     print(len(self.xtrain_q))
     self.xtrain_qa = self.input_feature(self.data.X_train()[1])
     self.models = [
         Word2Vec(self.xtrain_q, min_count=1, size=100, workers=4),
         Word2Vec(self.xtrain_qa, min_count=1, size=100, workers=4)
     ]
     self.train_wv = [
         np.array(
             [self.word_vectors(q, self.models[0]) for q in self.xtrain_q]),
         np.array(
             [self.word_vectors(q, self.models[1]) for q in self.xtrain_qa])
     ]
     #print(self.train_wv[0])
     self.qa = self.data.getQA()
Esempio n. 7
0
    def getRecommendations(self):
        ''' Use demographic filtering to get recommended apps '''
        try:
            # mongodb query
            x = list(self.mongo_obj.getCollection().aggregate([{
                "$match": {
                    "Rating": {
                        "$gt": 0
                    }
                }
            }, {
                "$group": {
                    "_id": "_id",
                    "AverageRating": {
                        "$avg": "$Rating"
                    }
                }
            }]))
            data = self.mongo_obj.searchData({}, {
                "_id": 0,
                "App": 1,
                "Category": 1,
                "Rating": 1,
                "Reviews": 1
            })
            data.dropna(inplace=True)

            # determine the score of all apps
            C = x[0]["AverageRating"]  # C - average rating for all apps
            # minimum number of reviews required
            m = data["Reviews"].quantile(0.9)
            filter_data = data.copy().loc[data["Reviews"] >= m]
            filter_data['Score'] = self.weighted_rating(filter_data, m, C)
            filter_data = filter_data.sort_values('Score', ascending=False)

            # display results
            dlg = Recommended_Apps_Dialog(self)
            dlg.tableView.setModel(DataModel(filter_data.head(n=20)))
            dlg.show()
        except Exception as e:
            self.alert("error", "Error", str(e))
Esempio n. 8
0
    def __init__(self, constraints_generator: str, sigma0: float,
                 scaler: bool, model_name: str, k: int, n: int, margin: float,
                 x0: np.ndarray = None, benchmark_mode: bool = False, clustering_k_min: int=0, seed: int = 404,
                 db: str = 'experiments', draw: bool = False, max_iter: int = int(50), train_sample: int = 500):
        data_model = DataModel(name=model_name, k=k, n=n, seed=seed, train_sample=train_sample) if model_name != "case_study" else CaseStudyDataModel()

        self.__n_constraints = cg.generate(constraints_generator, n)
        self.__w0 = np.repeat(1, self.__n_constraints)
        self.__x0 = x0
        log.debug('Creating train X')
        self.train_X = data_model.train_set()
        log.debug('Creating valid X')
        self.valid_X = data_model.valid_set()
        log.debug('Finished creating datasets')
        self.__dimensions = self.train_X.shape[1]
        self.__constraints_generator = constraints_generator
        self.test_X, self.test_Y = None, None
        self.__sigma0 = sigma0
        self.__scaler = StandardScaler() if scaler else None
        self.__data_model = data_model
        self.__margin = margin
        self.matches_constraints = data_model.benchmark_model.benchmark_objective_function if benchmark_mode else self.satisfies_constraints
        self.__clustering = clustering_k_min
        self.__results = list()
        self.__seed = seed
        self.db = db
        self.benchmark_mode = benchmark_mode
        self.draw = draw
        self.time_delta = None
        self.current_cluster = None
        self.max_iter = max_iter

        if self.__scaler is not None:
            self.__scaler.fit(self.train_X)
            self.train_X = self.__scaler.transform(self.train_X)
            self.valid_X = self.__scaler.transform(self.valid_X)

        if self.__clustering:
            self.clusters = xmeans_clustering(self.train_X, kmin=clustering_k_min, visualize=False)
 def __init__(self, parent, page_source_code, **kwargs):
     self.parent = parent
     self.data_model = DataModel(page_source_code=page_source_code)
     self.enable_web_copy(enable=kwargs.get("enable_web_copy", True))
Esempio n. 10
0
    idx2vec = joblib.load(idx2vec_path)
    word2idx = joblib.load(word2idx_path)
    idx2word = joblib.load(idx2word_path)
    label2idx = joblib.load(label2idx_src)

    process_args_list = []
    pool = multiprocessing.Pool(processes=4)  # 4 个进程
    for batch_size in batch_size_list:
        for learning_rate in learning_rate_list:
            for dropout in dropout_list:
                for layer_num in layer_num_list:
                    for hidden_num in hidden_num_list:

                        data_model = DataModel(batch_size=batch_size, fold_num=fold_num, sentence_len=sentence_len,
                                               word2idx=word2idx,
                                               label2idx=label2idx)
                        visual_model_name = "batch_size: " + str(batch_size) + "learning_rate: " + str(
                            learning_rate) + "dropout: " + str(dropout) + "layer_num: " + str(
                            layer_num) + "hidden_num: " + str(
                            hidden_num)
                        model_src_name = model_src + "batch_size: " + str(batch_size) + "learning_rate: " + str(
                            learning_rate) + "dropout: " + str(dropout) + "layer_num: " + str(
                            layer_num) + "hidden_num: " + str(
                            hidden_num)
                        if fold_num == 1:
                            k_fold_mode = False
                        else:
                            k_fold_mode = True
                        for k_idx in range(fold_num):
                            process_args_list.append((data_model, learning_rate, dropout, layer_num, hidden_num, k_idx,
Esempio n. 11
0
    def advSearchData(self):
        # open form dialog
        dlg = Adv_Search_Dialog(self)
        self.search_text.clear()
        if dlg.exec_():
            app = dlg.app.text()
            category = dlg.category.text()
            rating_low = float(dlg.rating_low.text())
            rating_high = float(dlg.rating_high.text())
            reviews_low = dlg.reviews_low.text()
            reviews_high = dlg.reviews_high.text()
            size_low = dlg.size_low.text()
            size_high = dlg.size_high.text()
            installs_low = dlg.installs_low.text()
            installs_high = dlg.installs_high.text()
            price_low = dlg.price_low.text()
            price_high = dlg.price_high.text()
            type = dlg.type.currentText()
            content_rating = dlg.content_rating.currentText()
            genre = dlg.genre.text()
            android_ver = dlg.android_ver.text()
            sort_field = dlg.sort_field.currentText()
            sort_order = dlg.sort_order.currentText()

            # creating the query
            try:
                query = {}
                if not any(
                    (app, category, rating_low, rating_high, reviews_low,
                     reviews_high, size_low, size_high, installs_low,
                     installs_high, price_low, price_high, genre, android_ver)
                ) and type == "None" and content_rating == "None" and sort_field == "None":
                    self.alert("error", "Search Failed",
                               "Please enter some values !")
                    return 0
                if app:
                    query["App"] = {"$regex": app, "$options": "i"}
                if category:
                    category = category.replace(" ", "_").upper()
                    query["Category"] = {"$regex": category, "$options": "i"}
                if rating_low != 0.0 or rating_high != 0.0:
                    query["Rating"] = {"$gte": rating_low}
                    if rating_high != 0.0:
                        query["Rating"]["$lte"] = rating_high
                if reviews_low or reviews_high:
                    query["Reviews"] = {"$gte": int(reviews_low)}
                    if reviews_high:
                        query["Rating"]["$lte"] = int(reviews_high)
                if size_low or size_high:
                    size_low = "" if not size_low else re.findall(
                        r'\d+\.?', size_low)[0] + "M"
                    query["Size"] = {"$gte": size_low}
                    if size_high:
                        size_high = re.findall(r'\d+\.?', size_high)[0] + "M"
                        query["Size"]["$lte"] = size_high
                if installs_low or installs_high:
                    if installs_low:
                        installs_low = installs_low.replace(",", "").replace(
                            "+", "")
                        installs_low = f'{int(installs_low):,}' + "+"
                    query["Installs"] = {"$gte": installs_low}
                    if installs_high:
                        installs_high = installs_high.replace(",", "").replace(
                            "+", "")
                        installs_high = f'{int(installs_high)}' + "+"
                        query["Installs"]["$lte"] = installs_high

                if price_low or price_high:
                    query["Price"] = {"$gte": float(price_low)}
                    if price_high:
                        query["Price"]["$lte"] = float(price_high)
                if type != "None":
                    query["Type"] = type
                if content_rating != "None":
                    query["Content Rating"] = content_rating
                if genre:
                    query["Genre"] = {"$regex": genre, "$options": "i"}
                if android_ver:
                    query["Android Ver"] = {
                        "$regex": "^" + android_ver,
                        "$options": "i"
                    }

                # print(query)
                res = None
                if sort_field == "None":
                    res = self.mongo_obj.searchData(query, {"_id": 0})
                else:
                    sort_order = 1 if sort_order == "Asc" else -1
                    res = self.mongo_obj.searchData(query, {"_id": 0},
                                                    sort_field, sort_order)

                # load data in the table widget
                self.model = DataModel(res)
                self.proxyModel = QSortFilterProxyModel(self)
                self.proxyModel.setSourceModel(self.model)
                self.tableView.setModel(self.proxyModel)
            except Exception as e:
                self.alert("error", "Search Failed", str(e))
"Observer Design Pattern Concept"

from data_model import DataModel
from data_controller import DataController
from pie_graph_view import PieGraphView
from bar_graph_view import BarGraphView
from table_view import TableView

# A local data view that the hypothetical external controller updates
DATA_MODEL = DataModel()

# Add some visualisation that use the dataview
PIE_GRAPH_VIEW = PieGraphView(DATA_MODEL)
BAR_GRAPH_VIEW = BarGraphView(DATA_MODEL)
TABLE_VIEW = TableView(DATA_MODEL)

# A hypothetical data controller running in a different process
DATA_CONTROLLER = DataController()

# The hypothetical external data controller updates some data
DATA_CONTROLLER.notify([1, 2, 3])

# Client now removes a local BAR_GRAPH
BAR_GRAPH_VIEW.delete()

# The hypothetical external data controller updates the data again
DATA_CONTROLLER.notify([4, 5, 6])
Esempio n. 13
0
def save():
    body = request.json
    new_data = DataModel(**body)
    ret = new_data.save()
    return jsonify({'id': str(ret.id)})
Esempio n. 14
0
def standard_scaler(name: str, n: int, k: int, seed: int) -> StandardScaler:
    df = DataModel(name, k=k, n=n, seed=seed).train_set()
    scaler = StandardScaler()
    scaler.fit(df)
    return scaler
Esempio n. 15
0
from monitor import Monitor
import joblib
from data_model import DataModel
import multiprocessing

word2idx = joblib.load('./preprocessing_data/word2idx.pkl')
label2idx = joblib.load('./preprocessing_data/label2idx.pkl')
idx2vec = joblib.load('./preprocessing_data/idx2vec.pkl')
idx2label = joblib.load('./preprocessing_data/idx2label.pkl')

process_args_list = []
fold_number = 10

raw_data = DataModel(fold_number)


def sub_process(train_args):
    test_monitor = Monitor(word2idx_dict=word2idx,
                           label2idx_dict=label2idx,
                           idx2vec_dict=idx2vec,
                           idx2label_dict=idx2label,
                           data=raw_data.choice_fold(train_args['fold_idx']),
                           sentence_fixed_len=train_args['sentence_fixed_len'],
                           learning_rate=train_args['learning_rate'],
                           word_vec_size=train_args['word_vec_size'],
                           hidden_num=train_args['hidden_num'],
                           label_num=train_args['label_num'],
                           k_model_src=train_args['k_model_src'])

    test_monitor.train(batch_size=train_args['batch_size'],
                       iter_num=train_args['iter_num'],
Esempio n. 16
0
async def save(request):
    body = request.json
    new_data = DataModel(**body)
    ret = await new_data.save()
    return response.json({'id': str(ret.id)}, status=201)
Esempio n. 17
0
        return 0.0


_threshold = np.vectorize(_threshold)


def _apply_avg_filter(merged_lines: np.ndarray):
    convolved_img = merged_lines.copy()
    #convolved_img[:,:,0] = scipy.ndimage.convolve(merged_lines[:,:,0], avg_filter, mode="constant")
    #convolved_img[:,:,1] = scipy.ndimage.convolve(merged_lines[:,:,1], avg_filter, mode="constant")
    return _threshold(convolved_img)


def _center_to_left_top_lines(lines):
    for line in lines:
        label, center_x, center_y, w, h = line

        x = center_x
        if center_x - w / 2 > 0:
            x -= w / 2
        y = center_y
        if center_y - h / 2 > 0:
            y -= h / 2

        yield (label, x, y, w, h)


if __name__ == '__main__':
    test_data_model = DataModel("./data/x", "./data/y", (256, 256, 3),
                                (256, 256, 3), (50, 50, 2))
    test(test_data_model)
Esempio n. 18
0
                    self.args.batch_threads_num)
                neighbors2 = bat.generate_neighbours(
                    self.eval_kg2_useful_ent_embeddings(),
                    self.kgs.useful_entities_list2, neighbors_num2,
                    self.args.batch_threads_num)
                ent_num = len(self.kgs.kg1.entities_list) + len(
                    self.kgs.kg2.entities_list)
                print('neighbor dict:', len(neighbors1), type(neighbors2))
                print("generating neighbors of {} entities costs {:.3f} s.".
                      format(ent_num,
                             time.time() - t1))
        for i in range(1, self.args.shared_learning_max_epoch + 1):
            self.train_shared_space_mapping_1epo(i, entity_list)
            if i >= self.args.start_valid and i % self.args.eval_freq == 0:
                valid(self, embed_choice='final')
        self.save()
        test(self, embed_choice='nv')
        test(self, embed_choice='rv')
        test(self, embed_choice='av')
        test(self, embed_choice='avg')
        test_WVA(self)
        test(self, embed_choice='final')


if __name__ == '__main__':
    args = load_args('args.json')
    data = DataModel(args)
    attr_align_model = PredicateAlignModel(data.kgs, args)
    model = MultiKE_Late(data, args, attr_align_model)
    model.run()
Esempio n. 19
0
    def __init__(self):

        BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
        sys.path.append(BASE_DIR)

        self.data_model = DataModel()