def get_data_from_folder(trainfolderpath): imagePaths = list(p.list_images(trainfolderpath)) features = [] labels = [] for (i, imagePath) in enumerate(imagePaths): # load the image and extract the class label (assuming that our # path as the format: /path/to/dataset/{class}.{image_num}.jpg image = cv2.imread(imagePath) label = imagePath.split(os.path.sep)[-1].split(".")[0] hist = extract_color_histogram(image) features.append(hist) labels.append(label) if i > 0 and i % 1000 == 0: print("[INFO] processed {}/{}".format(i, len(imagePaths))) # zmiana listy na tablicę features = np.array(features) labels = np.array(labels) (trainFeat, testFeat, trainLabels, testLabels) = train_test_split( features, labels, test_size=0.25, random_state=42) dataModel = DataModel() dataModel.setTrainFeat(trainFeat) dataModel.setTrainLabels(trainLabels) dataModel.setTestFeat(testFeat) dataModel.setTestLabels(testLabels) return dataModel
def loadData(self): # loads the data from mongodb in the table view self.search_text.clear() try: self.model = DataModel(self.mongo_obj.searchData({}, {"_id": 0})) self.proxyModel = QSortFilterProxyModel(self) self.proxyModel.setSourceModel(self.model) self.tableView.setModel(self.proxyModel) except Exception as e: self.alert("error", "Load failed", str(e))
def build_server(): server = Flask(__name__) server.config.from_object(SessionConfig) Session(server) app = DashAppWrapper(__name__, server=server, external_stylesheets=[dbc.themes.BOOTSTRAP]) empty_data = pd.DataFrame(columns=states + ["natl_pop_vote", "dem_ec"]) app.layout = DashLayout(DataModel(empty_data)).get_layout() set_app_callbacks(app) return server
def init_session_data(): csv_filenames = glob(f"{data_folder}/*.csv") elections_data = data_functions.load_data(csv_filenames) new_session_dict = dict( data_model=DataModel( elections_data.iloc[random.sample(range(len(elections_data)), data_size)] ), random_data_sample=None, random_sample_mode=False, new_random_538_map=False, new_average_538_map=False, reset_button_pressed=False ) set_session_dict(new_session_dict) return new_session_dict
def save_data_db(data, session): for d in data: result = session.query(DataModel).filter_by(mes=d['mes'], ano=d['ano']).first() if (result == None): event = DataModel(id=str(uuid.uuid4()), ano=d['ano'], bandeira=d['bandeira'], mes=d['mes'], numero_mes=d['numero_mes'], valor=d['valor'], created_at=datetime.datetime.now()) session.add(event) session.commit()
def __init__(self): self.data = DataModel() self.xtrain_q = self.input_feature(self.data.X_train()[0]) print(len(self.xtrain_q)) self.xtrain_qa = self.input_feature(self.data.X_train()[1]) self.models = [ Word2Vec(self.xtrain_q, min_count=1, size=100, workers=4), Word2Vec(self.xtrain_qa, min_count=1, size=100, workers=4) ] self.train_wv = [ np.array( [self.word_vectors(q, self.models[0]) for q in self.xtrain_q]), np.array( [self.word_vectors(q, self.models[1]) for q in self.xtrain_qa]) ] #print(self.train_wv[0]) self.qa = self.data.getQA()
def getRecommendations(self): ''' Use demographic filtering to get recommended apps ''' try: # mongodb query x = list(self.mongo_obj.getCollection().aggregate([{ "$match": { "Rating": { "$gt": 0 } } }, { "$group": { "_id": "_id", "AverageRating": { "$avg": "$Rating" } } }])) data = self.mongo_obj.searchData({}, { "_id": 0, "App": 1, "Category": 1, "Rating": 1, "Reviews": 1 }) data.dropna(inplace=True) # determine the score of all apps C = x[0]["AverageRating"] # C - average rating for all apps # minimum number of reviews required m = data["Reviews"].quantile(0.9) filter_data = data.copy().loc[data["Reviews"] >= m] filter_data['Score'] = self.weighted_rating(filter_data, m, C) filter_data = filter_data.sort_values('Score', ascending=False) # display results dlg = Recommended_Apps_Dialog(self) dlg.tableView.setModel(DataModel(filter_data.head(n=20))) dlg.show() except Exception as e: self.alert("error", "Error", str(e))
def __init__(self, constraints_generator: str, sigma0: float, scaler: bool, model_name: str, k: int, n: int, margin: float, x0: np.ndarray = None, benchmark_mode: bool = False, clustering_k_min: int=0, seed: int = 404, db: str = 'experiments', draw: bool = False, max_iter: int = int(50), train_sample: int = 500): data_model = DataModel(name=model_name, k=k, n=n, seed=seed, train_sample=train_sample) if model_name != "case_study" else CaseStudyDataModel() self.__n_constraints = cg.generate(constraints_generator, n) self.__w0 = np.repeat(1, self.__n_constraints) self.__x0 = x0 log.debug('Creating train X') self.train_X = data_model.train_set() log.debug('Creating valid X') self.valid_X = data_model.valid_set() log.debug('Finished creating datasets') self.__dimensions = self.train_X.shape[1] self.__constraints_generator = constraints_generator self.test_X, self.test_Y = None, None self.__sigma0 = sigma0 self.__scaler = StandardScaler() if scaler else None self.__data_model = data_model self.__margin = margin self.matches_constraints = data_model.benchmark_model.benchmark_objective_function if benchmark_mode else self.satisfies_constraints self.__clustering = clustering_k_min self.__results = list() self.__seed = seed self.db = db self.benchmark_mode = benchmark_mode self.draw = draw self.time_delta = None self.current_cluster = None self.max_iter = max_iter if self.__scaler is not None: self.__scaler.fit(self.train_X) self.train_X = self.__scaler.transform(self.train_X) self.valid_X = self.__scaler.transform(self.valid_X) if self.__clustering: self.clusters = xmeans_clustering(self.train_X, kmin=clustering_k_min, visualize=False)
def __init__(self, parent, page_source_code, **kwargs): self.parent = parent self.data_model = DataModel(page_source_code=page_source_code) self.enable_web_copy(enable=kwargs.get("enable_web_copy", True))
idx2vec = joblib.load(idx2vec_path) word2idx = joblib.load(word2idx_path) idx2word = joblib.load(idx2word_path) label2idx = joblib.load(label2idx_src) process_args_list = [] pool = multiprocessing.Pool(processes=4) # 4 个进程 for batch_size in batch_size_list: for learning_rate in learning_rate_list: for dropout in dropout_list: for layer_num in layer_num_list: for hidden_num in hidden_num_list: data_model = DataModel(batch_size=batch_size, fold_num=fold_num, sentence_len=sentence_len, word2idx=word2idx, label2idx=label2idx) visual_model_name = "batch_size: " + str(batch_size) + "learning_rate: " + str( learning_rate) + "dropout: " + str(dropout) + "layer_num: " + str( layer_num) + "hidden_num: " + str( hidden_num) model_src_name = model_src + "batch_size: " + str(batch_size) + "learning_rate: " + str( learning_rate) + "dropout: " + str(dropout) + "layer_num: " + str( layer_num) + "hidden_num: " + str( hidden_num) if fold_num == 1: k_fold_mode = False else: k_fold_mode = True for k_idx in range(fold_num): process_args_list.append((data_model, learning_rate, dropout, layer_num, hidden_num, k_idx,
def advSearchData(self): # open form dialog dlg = Adv_Search_Dialog(self) self.search_text.clear() if dlg.exec_(): app = dlg.app.text() category = dlg.category.text() rating_low = float(dlg.rating_low.text()) rating_high = float(dlg.rating_high.text()) reviews_low = dlg.reviews_low.text() reviews_high = dlg.reviews_high.text() size_low = dlg.size_low.text() size_high = dlg.size_high.text() installs_low = dlg.installs_low.text() installs_high = dlg.installs_high.text() price_low = dlg.price_low.text() price_high = dlg.price_high.text() type = dlg.type.currentText() content_rating = dlg.content_rating.currentText() genre = dlg.genre.text() android_ver = dlg.android_ver.text() sort_field = dlg.sort_field.currentText() sort_order = dlg.sort_order.currentText() # creating the query try: query = {} if not any( (app, category, rating_low, rating_high, reviews_low, reviews_high, size_low, size_high, installs_low, installs_high, price_low, price_high, genre, android_ver) ) and type == "None" and content_rating == "None" and sort_field == "None": self.alert("error", "Search Failed", "Please enter some values !") return 0 if app: query["App"] = {"$regex": app, "$options": "i"} if category: category = category.replace(" ", "_").upper() query["Category"] = {"$regex": category, "$options": "i"} if rating_low != 0.0 or rating_high != 0.0: query["Rating"] = {"$gte": rating_low} if rating_high != 0.0: query["Rating"]["$lte"] = rating_high if reviews_low or reviews_high: query["Reviews"] = {"$gte": int(reviews_low)} if reviews_high: query["Rating"]["$lte"] = int(reviews_high) if size_low or size_high: size_low = "" if not size_low else re.findall( r'\d+\.?', size_low)[0] + "M" query["Size"] = {"$gte": size_low} if size_high: size_high = re.findall(r'\d+\.?', size_high)[0] + "M" query["Size"]["$lte"] = size_high if installs_low or installs_high: if installs_low: installs_low = installs_low.replace(",", "").replace( "+", "") installs_low = f'{int(installs_low):,}' + "+" query["Installs"] = {"$gte": installs_low} if installs_high: installs_high = installs_high.replace(",", "").replace( "+", "") installs_high = f'{int(installs_high)}' + "+" query["Installs"]["$lte"] = installs_high if price_low or price_high: query["Price"] = {"$gte": float(price_low)} if price_high: query["Price"]["$lte"] = float(price_high) if type != "None": query["Type"] = type if content_rating != "None": query["Content Rating"] = content_rating if genre: query["Genre"] = {"$regex": genre, "$options": "i"} if android_ver: query["Android Ver"] = { "$regex": "^" + android_ver, "$options": "i" } # print(query) res = None if sort_field == "None": res = self.mongo_obj.searchData(query, {"_id": 0}) else: sort_order = 1 if sort_order == "Asc" else -1 res = self.mongo_obj.searchData(query, {"_id": 0}, sort_field, sort_order) # load data in the table widget self.model = DataModel(res) self.proxyModel = QSortFilterProxyModel(self) self.proxyModel.setSourceModel(self.model) self.tableView.setModel(self.proxyModel) except Exception as e: self.alert("error", "Search Failed", str(e))
"Observer Design Pattern Concept" from data_model import DataModel from data_controller import DataController from pie_graph_view import PieGraphView from bar_graph_view import BarGraphView from table_view import TableView # A local data view that the hypothetical external controller updates DATA_MODEL = DataModel() # Add some visualisation that use the dataview PIE_GRAPH_VIEW = PieGraphView(DATA_MODEL) BAR_GRAPH_VIEW = BarGraphView(DATA_MODEL) TABLE_VIEW = TableView(DATA_MODEL) # A hypothetical data controller running in a different process DATA_CONTROLLER = DataController() # The hypothetical external data controller updates some data DATA_CONTROLLER.notify([1, 2, 3]) # Client now removes a local BAR_GRAPH BAR_GRAPH_VIEW.delete() # The hypothetical external data controller updates the data again DATA_CONTROLLER.notify([4, 5, 6])
def save(): body = request.json new_data = DataModel(**body) ret = new_data.save() return jsonify({'id': str(ret.id)})
def standard_scaler(name: str, n: int, k: int, seed: int) -> StandardScaler: df = DataModel(name, k=k, n=n, seed=seed).train_set() scaler = StandardScaler() scaler.fit(df) return scaler
from monitor import Monitor import joblib from data_model import DataModel import multiprocessing word2idx = joblib.load('./preprocessing_data/word2idx.pkl') label2idx = joblib.load('./preprocessing_data/label2idx.pkl') idx2vec = joblib.load('./preprocessing_data/idx2vec.pkl') idx2label = joblib.load('./preprocessing_data/idx2label.pkl') process_args_list = [] fold_number = 10 raw_data = DataModel(fold_number) def sub_process(train_args): test_monitor = Monitor(word2idx_dict=word2idx, label2idx_dict=label2idx, idx2vec_dict=idx2vec, idx2label_dict=idx2label, data=raw_data.choice_fold(train_args['fold_idx']), sentence_fixed_len=train_args['sentence_fixed_len'], learning_rate=train_args['learning_rate'], word_vec_size=train_args['word_vec_size'], hidden_num=train_args['hidden_num'], label_num=train_args['label_num'], k_model_src=train_args['k_model_src']) test_monitor.train(batch_size=train_args['batch_size'], iter_num=train_args['iter_num'],
async def save(request): body = request.json new_data = DataModel(**body) ret = await new_data.save() return response.json({'id': str(ret.id)}, status=201)
return 0.0 _threshold = np.vectorize(_threshold) def _apply_avg_filter(merged_lines: np.ndarray): convolved_img = merged_lines.copy() #convolved_img[:,:,0] = scipy.ndimage.convolve(merged_lines[:,:,0], avg_filter, mode="constant") #convolved_img[:,:,1] = scipy.ndimage.convolve(merged_lines[:,:,1], avg_filter, mode="constant") return _threshold(convolved_img) def _center_to_left_top_lines(lines): for line in lines: label, center_x, center_y, w, h = line x = center_x if center_x - w / 2 > 0: x -= w / 2 y = center_y if center_y - h / 2 > 0: y -= h / 2 yield (label, x, y, w, h) if __name__ == '__main__': test_data_model = DataModel("./data/x", "./data/y", (256, 256, 3), (256, 256, 3), (50, 50, 2)) test(test_data_model)
self.args.batch_threads_num) neighbors2 = bat.generate_neighbours( self.eval_kg2_useful_ent_embeddings(), self.kgs.useful_entities_list2, neighbors_num2, self.args.batch_threads_num) ent_num = len(self.kgs.kg1.entities_list) + len( self.kgs.kg2.entities_list) print('neighbor dict:', len(neighbors1), type(neighbors2)) print("generating neighbors of {} entities costs {:.3f} s.". format(ent_num, time.time() - t1)) for i in range(1, self.args.shared_learning_max_epoch + 1): self.train_shared_space_mapping_1epo(i, entity_list) if i >= self.args.start_valid and i % self.args.eval_freq == 0: valid(self, embed_choice='final') self.save() test(self, embed_choice='nv') test(self, embed_choice='rv') test(self, embed_choice='av') test(self, embed_choice='avg') test_WVA(self) test(self, embed_choice='final') if __name__ == '__main__': args = load_args('args.json') data = DataModel(args) attr_align_model = PredicateAlignModel(data.kgs, args) model = MultiKE_Late(data, args, attr_align_model) model.run()
def __init__(self): BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.append(BASE_DIR) self.data_model = DataModel()