def __init__(self): super(MainWindow,self).__init__() self.resize(500, 40) self.setWindowTitle("Key Frame View") self.keyPanel = KeyFramePanel(None) dataModel = DataModel(dataContainer = DemoData(50),prefetchSize = 0) transModel = TransformModel() transModel.setModel(dataModel) transModel.setValueScale(0,200) dataModel.setPos(2) self.keyPanel.connect_to_transform(transModel) k = KeyFrameList() k.addItem(KeyFrame(0.1)) k.addItem(KeyFrame(0.9)) # k = KeyFrameList._from_JSON(open("test.json").read()) # print k # self.keyPanel.setModel(k) self.keyPanel.load_from_JSON("test.json") self.setCentralWidget(self.keyPanel) self.setStyleSheet("background-color:black;")
def __init__(self): super(MainWindow,self).__init__() self.resize(500, 40) self.setWindowTitle("Key Frame View") self.keyPanel = KeyFramePanel(None) dataModel = DataModel(dataContainer = DemoData(50),prefetchSize = 0) transModel = TransformModel() transModel.setModel(dataModel) dataModel.setPos(2) self.keyPanel.keyView.setTransformModel(transModel) k = KeyFrameList() k.addItem(KeyFrame(0.4)) # k.addItem(KeyFrame(0.9)) self.keyPanel.keyView.setKeyListModel(k) self.setCentralWidget(self.keyPanel) self.setStyleSheet("background-color:black;")
def get_data_from_folder(trainfolderpath): imagePaths = list(p.list_images(trainfolderpath)) features = [] labels = [] for (i, imagePath) in enumerate(imagePaths): # load the image and extract the class label (assuming that our # path as the format: /path/to/dataset/{class}.{image_num}.jpg image = cv2.imread(imagePath) label = imagePath.split(os.path.sep)[-1].split(".")[0] hist = extract_color_histogram(image) features.append(hist) labels.append(label) if i > 0 and i % 1000 == 0: print("[INFO] processed {}/{}".format(i, len(imagePaths))) # zmiana listy na tablicę features = np.array(features) labels = np.array(labels) (trainFeat, testFeat, trainLabels, testLabels) = train_test_split( features, labels, test_size=0.25, random_state=42) dataModel = DataModel() dataModel.setTrainFeat(trainFeat) dataModel.setTrainLabels(trainLabels) dataModel.setTestFeat(testFeat) dataModel.setTestLabels(testLabels) return dataModel
class W2Vec: def __init__(self): self.data = DataModel() self.xtrain_q = self.input_feature(self.data.X_train()[0]) print(len(self.xtrain_q)) self.xtrain_qa = self.input_feature(self.data.X_train()[1]) self.models = [ Word2Vec(self.xtrain_q, min_count=1, size=100, workers=4), Word2Vec(self.xtrain_qa, min_count=1, size=100, workers=4) ] self.train_wv = [ np.array( [self.word_vectors(q, self.models[0]) for q in self.xtrain_q]), np.array( [self.word_vectors(q, self.models[1]) for q in self.xtrain_qa]) ] #print(self.train_wv[0]) self.qa = self.data.getQA() def word2vec_data(self, data): return [[word for word in row.split()] for row in data] def input_feature(self, q): q = [q] q = self.data.text_preprocessor(q) q = self.word2vec_data(q) return q def word_vectors(self, q, model, test=False): if test: q = self.input_feature(q) wv = [] for row in q: vec = np.zeros(100) count = 0 for word in row: try: vec += model[word] count += 1 except: pass wv.append(vec / count) return np.array(wv) def predict(self, q): answers = [] for index, vectors in enumerate(self.train_wv): print(vectors.shape)
def find_best_single_feature_parameters(self, dataset): for feature in dataset.suggested_discretize_features: permutations = self.generate_feature_parameters(feature) print(permutations) best_mean_fcs = self.best_fcs[dataset] best_perm = None for p, perm in enumerate(permutations): logging.error("[Parameters Tester][{}][{}][Perm {:03d}] Current permutation: {}".format(dataset, feature, p+1, perm)) dm = DataModel.generate_from_file(dataset, discretize_params=perm) classes_list = dm.get_classes_list() f_scores = [] a = 1 for _ in range(self.best_fold[dataset][1]): for train_set, test_set in dm.generate_k_folds_stratified(self.best_fold[dataset][0]): model_evaluator = ModelEvaluator(train_set, test_set, classes_list) model_evaluator.evaluate() f_scores.append(model_evaluator.get_f_score()) logging.error("[Parameters Tester][{}][{}][Perm {:03d}][{:03d}] FCS: {}".format(dataset, feature, p+1, a, f_scores[-1])) a += 1 f_score_mean = sum(f_scores) / len(f_scores) logging.error("[Parameters Tester][{}][{}][Perm {:03d}] Best FCS: {}, Mean FCS {}".format(dataset, feature, p+1, max(f_scores), f_score_mean)) if f_score_mean > best_mean_fcs: best_perm = perm[0] best_mean_fcs = f_score_mean if best_perm is not None: self.best_discretize_feature_params[dataset].append(best_perm) logging.error("[Parameters Tester][{}][{}] Best mean FCS: {}, Best parameters: {}".format(dataset, feature, best_mean_fcs, best_perm))
def __init__(self): self.data = DataModel() self.xtrain_q = self.input_feature(self.data.X_train()[0]) print(len(self.xtrain_q)) self.xtrain_qa = self.input_feature(self.data.X_train()[1]) self.models = [ Word2Vec(self.xtrain_q, min_count=1, size=100, workers=4), Word2Vec(self.xtrain_qa, min_count=1, size=100, workers=4) ] self.train_wv = [ np.array( [self.word_vectors(q, self.models[0]) for q in self.xtrain_q]), np.array( [self.word_vectors(q, self.models[1]) for q in self.xtrain_qa]) ] #print(self.train_wv[0]) self.qa = self.data.getQA()
def __init__(self, data_model=None): if data_model is None: self.data_model = DataModel.new() else: self.data_model = data_model self.connection_set = ConnectionSet(self.data_model) self.node_types = {}
def loadData(self): # loads the data from mongodb in the table view self.search_text.clear() try: self.model = DataModel(self.mongo_obj.searchData({}, {"_id": 0})) self.proxyModel = QSortFilterProxyModel(self) self.proxyModel.setSourceModel(self.model) self.tableView.setModel(self.proxyModel) except Exception as e: self.alert("error", "Load failed", str(e))
def main(): dm = DataModel.generate_from_file( PIMA_DIABETES_DATASET, smooth=True, discretize_params=[ DiscretizeParam('Age', kbins_discretize, 10), DiscretizeParam('SkinThickness', kbins_discretize, 10), DiscretizeParam('Pregnancies', kbins_discretize, 10) ]) print(Discretizer.kmean_models)
def build_server(): server = Flask(__name__) server.config.from_object(SessionConfig) Session(server) app = DashAppWrapper(__name__, server=server, external_stylesheets=[dbc.themes.BOOTSTRAP]) empty_data = pd.DataFrame(columns=states + ["natl_pop_vote", "dem_ec"]) app.layout = DashLayout(DataModel(empty_data)).get_layout() set_app_callbacks(app) return server
def __init__(self, constraints_generator: str, sigma0: float, scaler: bool, model_name: str, k: int, n: int, margin: float, x0: np.ndarray = None, benchmark_mode: bool = False, clustering_k_min: int=0, seed: int = 404, db: str = 'experiments', draw: bool = False, max_iter: int = int(50), train_sample: int = 500): data_model = DataModel(name=model_name, k=k, n=n, seed=seed, train_sample=train_sample) if model_name != "case_study" else CaseStudyDataModel() self.__n_constraints = cg.generate(constraints_generator, n) self.__w0 = np.repeat(1, self.__n_constraints) self.__x0 = x0 log.debug('Creating train X') self.train_X = data_model.train_set() log.debug('Creating valid X') self.valid_X = data_model.valid_set() log.debug('Finished creating datasets') self.__dimensions = self.train_X.shape[1] self.__constraints_generator = constraints_generator self.test_X, self.test_Y = None, None self.__sigma0 = sigma0 self.__scaler = StandardScaler() if scaler else None self.__data_model = data_model self.__margin = margin self.matches_constraints = data_model.benchmark_model.benchmark_objective_function if benchmark_mode else self.satisfies_constraints self.__clustering = clustering_k_min self.__results = list() self.__seed = seed self.db = db self.benchmark_mode = benchmark_mode self.draw = draw self.time_delta = None self.current_cluster = None self.max_iter = max_iter if self.__scaler is not None: self.__scaler.fit(self.train_X) self.train_X = self.__scaler.transform(self.train_X) self.valid_X = self.__scaler.transform(self.valid_X) if self.__clustering: self.clusters = xmeans_clustering(self.train_X, kmin=clustering_k_min, visualize=False)
def get_session_dict(): if not session.get("initialized", False): return init_session_data() print("get", sorted(session.keys())) session_dict = dict( random_data_sample=deserialize_pd(session["random_data_sample"]), random_sample_mode=session["random_sample_mode"], new_random_538_map=session["new_random_538_map"], new_average_538_map=session["new_average_538_map"], reset_button_pressed=session["reset_button_pressed"], data_model=DataModel.from_json(session["data_model_json"]) ) return session_dict
def init_session_data(): csv_filenames = glob(f"{data_folder}/*.csv") elections_data = data_functions.load_data(csv_filenames) new_session_dict = dict( data_model=DataModel( elections_data.iloc[random.sample(range(len(elections_data)), data_size)] ), random_data_sample=None, random_sample_mode=False, new_random_538_map=False, new_average_538_map=False, reset_button_pressed=False ) set_session_dict(new_session_dict) return new_session_dict
def save_data_db(data, session): for d in data: result = session.query(DataModel).filter_by(mes=d['mes'], ano=d['ano']).first() if (result == None): event = DataModel(id=str(uuid.uuid4()), ano=d['ano'], bandeira=d['bandeira'], mes=d['mes'], numero_mes=d['numero_mes'], valor=d['valor'], created_at=datetime.datetime.now()) session.add(event) session.commit()
def find_best_fold(self, dataset): dm = DataModel.generate_from_file(dataset) classes_list = dm.get_classes_list() for fold in FOLDS: f_scores = [] a = 1 for _ in range(fold[1]): for train_set, test_set in dm.generate_k_folds_stratified(fold[0]): model_evaluator = ModelEvaluator(train_set, test_set, classes_list) model_evaluator.evaluate() f_scores.append(model_evaluator.get_f_score()) logging.error("[Parameters Tester][{}][CV{:02d}][{:03d}] FCS: {}".format(dataset, fold[0], a, f_scores[-1])) a += 1 f_score_mean = sum(f_scores) / len(f_scores) logging.error("[Parameters Tester][{}][CV{:02d}] Best FCS: {}, Mean FCS {}".format(dataset, fold[0], max(f_scores), f_score_mean)) self.append_result({'dataset':dataset.name, 'fold':fold[0], 'f_score':f_score_mean, 'permutation':-1}) if f_score_mean > self.best_fcs[dataset]: self.best_fold[dataset] = fold self.best_fcs[dataset] = f_score_mean logging.error("[Parameters Tester][{}] Best mean FCS: {}, Best fold: {}".format(dataset, self.best_fcs[dataset], self.best_fold[dataset]))
def getRecommendations(self): ''' Use demographic filtering to get recommended apps ''' try: # mongodb query x = list(self.mongo_obj.getCollection().aggregate([{ "$match": { "Rating": { "$gt": 0 } } }, { "$group": { "_id": "_id", "AverageRating": { "$avg": "$Rating" } } }])) data = self.mongo_obj.searchData({}, { "_id": 0, "App": 1, "Category": 1, "Rating": 1, "Reviews": 1 }) data.dropna(inplace=True) # determine the score of all apps C = x[0]["AverageRating"] # C - average rating for all apps # minimum number of reviews required m = data["Reviews"].quantile(0.9) filter_data = data.copy().loc[data["Reviews"] >= m] filter_data['Score'] = self.weighted_rating(filter_data, m, C) filter_data = filter_data.sort_values('Score', ascending=False) # display results dlg = Recommended_Apps_Dialog(self) dlg.tableView.setModel(DataModel(filter_data.head(n=20))) dlg.show() except Exception as e: self.alert("error", "Error", str(e))
def from_json(file_name, data_model_type='dict'): ''' Import a network from a JSON file using the nio library. ''' data_model = DataModel.new(data_model_type) network = Network(data_model) data = nio.read(file_name) nodes = data.get('nodes', None) if nodes is not None: network.data_model.insert('nodes', nodes) types = data.get('types', None) if types is not None: network.types = types connections = data.get('connections', None) if connections is not None: network.connection_set.add_list(connections) return network
def find_best_parameters(self, dataset): permutations = self.generate_permutations(dataset) for p, perm in enumerate(permutations): logging.error("[Parameters Tester][{}][Perm {:08d}] Current permutation: {}".format(dataset, p+1, perm)) dm = DataModel.generate_from_file(dataset, discretize_params=perm) classes_list = dm.get_classes_list() f_scores = [] a = 1 for _ in range(self.best_fold[dataset][1]): for train_set, test_set in dm.generate_k_folds_stratified(self.best_fold[dataset][0]): model_evaluator = ModelEvaluator(train_set, test_set, classes_list) model_evaluator.evaluate() f_scores.append(model_evaluator.get_f_score()) logging.error("[Parameters Tester][{}][Perm {:08d}][{:03d}] FCS: {}".format(dataset, p+1, a, f_scores[-1])) a += 1 f_score_mean = sum(f_scores) / len(f_scores) logging.error("[Parameters Tester][{}][Perm {:08d}] Best FCS: {}, Mean FCS {}".format(dataset, p+1, max(f_scores), f_score_mean)) for param in perm: self.append_result({'dataset':dataset.name, 'fold':self.best_fold[dataset][0], 'f_score':f_score_mean, 'permutation':p + 1, 'feature':param.feature_name, 'function':param.discretize_function.__name__, 'bins':param.buckets_amount}) if f_score_mean > self.best_fcs[dataset]: self.best_discretize_parameters[dataset] = perm self.best_fcs[dataset] = f_score_mean logging.error("[Parameters Tester][{}] Best mean FCS: {}, Best parameters: {}".format(dataset, self.best_fcs[dataset], self.best_discretize_parameters[dataset]))
def __init__(self): BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.append(BASE_DIR) self.data_model = DataModel()
def fetch(): body = request.json['id'] ret = DataModel.objects(id=body).first() return jsonify({'id': str(ret.id), 'text': ret.text})
__author__ = 'David' import keras from data_model import DataModel as DM from model import Model as M import random import numpy as np import sys data = DM() data_input = data.open_file('..\posts_files\posts_Culture.txt') X, y, chars, char_indices, indices_char = data.prepare_dataset(data_input) model_class = M() maxlen = 20 step = 3 sentences = [] next_chars = [] model = model_class.generate_model(data_input, maxlen) for iteration in range(1, 60): print() print('-' * 50) print('Iteration', iteration) model.fit(X, y, batch_size=16, nb_epoch=1) start_index = random.randint(0, len(data_input) - maxlen - 1) for diversity in [0.2, 0.5, 1.0, 1.2]:
def __init__(self, parent, page_source_code, **kwargs): self.parent = parent self.data_model = DataModel(page_source_code=page_source_code) self.enable_web_copy(enable=kwargs.get("enable_web_copy", True))
async def save(request): body = request.json new_data = DataModel(**body) ret = await new_data.save() return response.json({'id': str(ret.id)}, status=201)
from data_model import model_name, DataModel import json json_name = "data/data_model.json" if __name__ == "__main__": data = DataModel.load(model_name) with open(json_name, "w") as fh: json.dump(data.list, fh)
self.args.batch_threads_num) neighbors2 = bat.generate_neighbours( self.eval_kg2_useful_ent_embeddings(), self.kgs.useful_entities_list2, neighbors_num2, self.args.batch_threads_num) ent_num = len(self.kgs.kg1.entities_list) + len( self.kgs.kg2.entities_list) print('neighbor dict:', len(neighbors1), type(neighbors2)) print("generating neighbors of {} entities costs {:.3f} s.". format(ent_num, time.time() - t1)) for i in range(1, self.args.shared_learning_max_epoch + 1): self.train_shared_space_mapping_1epo(i, entity_list) if i >= self.args.start_valid and i % self.args.eval_freq == 0: valid(self, embed_choice='final') self.save() test(self, embed_choice='nv') test(self, embed_choice='rv') test(self, embed_choice='av') test(self, embed_choice='avg') test_WVA(self) test(self, embed_choice='final') if __name__ == '__main__': args = load_args('args.json') data = DataModel(args) attr_align_model = PredicateAlignModel(data.kgs, args) model = MultiKE_Late(data, args, attr_align_model) model.run()
def advSearchData(self): # open form dialog dlg = Adv_Search_Dialog(self) self.search_text.clear() if dlg.exec_(): app = dlg.app.text() category = dlg.category.text() rating_low = float(dlg.rating_low.text()) rating_high = float(dlg.rating_high.text()) reviews_low = dlg.reviews_low.text() reviews_high = dlg.reviews_high.text() size_low = dlg.size_low.text() size_high = dlg.size_high.text() installs_low = dlg.installs_low.text() installs_high = dlg.installs_high.text() price_low = dlg.price_low.text() price_high = dlg.price_high.text() type = dlg.type.currentText() content_rating = dlg.content_rating.currentText() genre = dlg.genre.text() android_ver = dlg.android_ver.text() sort_field = dlg.sort_field.currentText() sort_order = dlg.sort_order.currentText() # creating the query try: query = {} if not any( (app, category, rating_low, rating_high, reviews_low, reviews_high, size_low, size_high, installs_low, installs_high, price_low, price_high, genre, android_ver) ) and type == "None" and content_rating == "None" and sort_field == "None": self.alert("error", "Search Failed", "Please enter some values !") return 0 if app: query["App"] = {"$regex": app, "$options": "i"} if category: category = category.replace(" ", "_").upper() query["Category"] = {"$regex": category, "$options": "i"} if rating_low != 0.0 or rating_high != 0.0: query["Rating"] = {"$gte": rating_low} if rating_high != 0.0: query["Rating"]["$lte"] = rating_high if reviews_low or reviews_high: query["Reviews"] = {"$gte": int(reviews_low)} if reviews_high: query["Rating"]["$lte"] = int(reviews_high) if size_low or size_high: size_low = "" if not size_low else re.findall( r'\d+\.?', size_low)[0] + "M" query["Size"] = {"$gte": size_low} if size_high: size_high = re.findall(r'\d+\.?', size_high)[0] + "M" query["Size"]["$lte"] = size_high if installs_low or installs_high: if installs_low: installs_low = installs_low.replace(",", "").replace( "+", "") installs_low = f'{int(installs_low):,}' + "+" query["Installs"] = {"$gte": installs_low} if installs_high: installs_high = installs_high.replace(",", "").replace( "+", "") installs_high = f'{int(installs_high)}' + "+" query["Installs"]["$lte"] = installs_high if price_low or price_high: query["Price"] = {"$gte": float(price_low)} if price_high: query["Price"]["$lte"] = float(price_high) if type != "None": query["Type"] = type if content_rating != "None": query["Content Rating"] = content_rating if genre: query["Genre"] = {"$regex": genre, "$options": "i"} if android_ver: query["Android Ver"] = { "$regex": "^" + android_ver, "$options": "i" } # print(query) res = None if sort_field == "None": res = self.mongo_obj.searchData(query, {"_id": 0}) else: sort_order = 1 if sort_order == "Asc" else -1 res = self.mongo_obj.searchData(query, {"_id": 0}, sort_field, sort_order) # load data in the table widget self.model = DataModel(res) self.proxyModel = QSortFilterProxyModel(self) self.proxyModel.setSourceModel(self.model) self.tableView.setModel(self.proxyModel) except Exception as e: self.alert("error", "Search Failed", str(e))
def standard_scaler(name: str, n: int, k: int, seed: int) -> StandardScaler: df = DataModel(name, k=k, n=n, seed=seed).train_set() scaler = StandardScaler() scaler.fit(df) return scaler
from monitor import Monitor import joblib from data_model import DataModel import multiprocessing word2idx = joblib.load('./preprocessing_data/word2idx.pkl') label2idx = joblib.load('./preprocessing_data/label2idx.pkl') idx2vec = joblib.load('./preprocessing_data/idx2vec.pkl') idx2label = joblib.load('./preprocessing_data/idx2label.pkl') process_args_list = [] fold_number = 10 raw_data = DataModel(fold_number) def sub_process(train_args): test_monitor = Monitor(word2idx_dict=word2idx, label2idx_dict=label2idx, idx2vec_dict=idx2vec, idx2label_dict=idx2label, data=raw_data.choice_fold(train_args['fold_idx']), sentence_fixed_len=train_args['sentence_fixed_len'], learning_rate=train_args['learning_rate'], word_vec_size=train_args['word_vec_size'], hidden_num=train_args['hidden_num'], label_num=train_args['label_num'], k_model_src=train_args['k_model_src']) test_monitor.train(batch_size=train_args['batch_size'], iter_num=train_args['iter_num'],
class HtmlGenerator: def __init__(self): BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.append(BASE_DIR) self.data_model = DataModel() def render_all_categories(self): tree = [] for c in self.data_model.get_categories(): element = {} element['text'] = c['CategoryName'] tree.append(element) self.render_template(json.dumps(tree), 'all_categories') def render_single_categorie(self, parent_id): data = self.data_model.get_categories_by_parent(parent_id) tree = [] element = {} element['text'] = data['parent']['CategoryName'] element['nodes'] = [] for i in data['childs']: n_element = {} n_element['text'] = i['CategoryName'] element['nodes'].append(n_element) tree.append(element) self.render_template(json.dumps(tree), str(parent_id)) def render_template(self, tree, file_name): js_script = "$('.tree-container').treeview({data:" + tree + "});" template = ''' <!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <meta name="viewport" content="width=device-width, initial-scale=1"> <!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags --> <title>Ebay Categories</title> <!-- Latest compiled and minified CSS --> <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css" integrity="sha384-BVYiiSIFeK1dGmJRAkycuHAHRg32OmUcww7on3RYdg4Va+PmSTsz/K68vbdEjh4u" crossorigin="anonymous"> <link rel="stylesheet" href="./css/bootstrap-treeview.min.css" media="screen" title="no title"> </head> <body> <nav class="navbar navbar-static-top navbar-dark bg-inverse"> <a class="navbar-brand" href="#">Ebay Cateogries Render</a> </nav> <!-- Main jumbotron for a primary marketing message or call to action --> <div class="jumbotron"> <div class="container"> <h1 class="display-3">Render Ebay Categories Project</h1> <p>With this project you will be able to download and render all ebay categories on bottom side of this page.</p> <p><a class="btn btn-primary btn-lg" href="#" role="button">Go to github repositorie »</a></p> </div> </div> <div class="container"> <!-- Example row of columns --> <div class="row"> <div class="col-md-12"> <div class="tree-container"> </div> </div> </div> <hr> <footer> <p>© Company 2016</p> </footer> </div> <!-- /container --> <!-- jQuery (necessary for Bootstrap's JavaScript plugins) --> <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.12.4/jquery.min.js"></script> <!-- Include all compiled plugins (below), or include individual files as needed --> <!-- Latest compiled and minified JavaScript --> <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script> <script src="./js/bootstrap-treeview.min.js" charset="utf-8"></script> <script type="text/javascript"> {js_script} </script> </body> </html> '''.format(js_script=js_script) file = open("html_generated/" + file_name + ".html", "w") file.write(template) file.close() return template
def save(): body = request.json new_data = DataModel(**body) ret = new_data.save() return jsonify({'id': str(ret.id)})
idx2vec = joblib.load(idx2vec_path) word2idx = joblib.load(word2idx_path) idx2word = joblib.load(idx2word_path) label2idx = joblib.load(label2idx_src) process_args_list = [] pool = multiprocessing.Pool(processes=4) # 4 个进程 for batch_size in batch_size_list: for learning_rate in learning_rate_list: for dropout in dropout_list: for layer_num in layer_num_list: for hidden_num in hidden_num_list: data_model = DataModel(batch_size=batch_size, fold_num=fold_num, sentence_len=sentence_len, word2idx=word2idx, label2idx=label2idx) visual_model_name = "batch_size: " + str(batch_size) + "learning_rate: " + str( learning_rate) + "dropout: " + str(dropout) + "layer_num: " + str( layer_num) + "hidden_num: " + str( hidden_num) model_src_name = model_src + "batch_size: " + str(batch_size) + "learning_rate: " + str( learning_rate) + "dropout: " + str(dropout) + "layer_num: " + str( layer_num) + "hidden_num: " + str( hidden_num) if fold_num == 1: k_fold_mode = False else: k_fold_mode = True for k_idx in range(fold_num): process_args_list.append((data_model, learning_rate, dropout, layer_num, hidden_num, k_idx,
import requests from parse_html import parse from data_model import DataModel, model_name from tqdm import tqdm import time import random baseURL = "https://www.indeed.com/jobs?q=&l=Atlanta%2C+GA&filter={}start={}" filterNumbers = list(range(0, 10)) startNumbers = list(range(0, 1000, 10)) minSleepTime = 1.00 randomRange = 1.50 if __name__ == "__main__": data = DataModel.load(model_name) compoundNumber = [(filterNumber, startNumber) for filterNumber in filterNumbers for startNumber in startNumbers] print("Request Count:", len(compoundNumber)) for filterNumber, startNumber in tqdm(compoundNumber): url = baseURL.format(filterNumber, startNumber) print("Request URL: ", url) response = requests.get(url) items = parse(response.text) for item in items: data.add(item)
return 0.0 _threshold = np.vectorize(_threshold) def _apply_avg_filter(merged_lines: np.ndarray): convolved_img = merged_lines.copy() #convolved_img[:,:,0] = scipy.ndimage.convolve(merged_lines[:,:,0], avg_filter, mode="constant") #convolved_img[:,:,1] = scipy.ndimage.convolve(merged_lines[:,:,1], avg_filter, mode="constant") return _threshold(convolved_img) def _center_to_left_top_lines(lines): for line in lines: label, center_x, center_y, w, h = line x = center_x if center_x - w / 2 > 0: x -= w / 2 y = center_y if center_y - h / 2 > 0: y -= h / 2 yield (label, x, y, w, h) if __name__ == '__main__': test_data_model = DataModel("./data/x", "./data/y", (256, 256, 3), (256, 256, 3), (50, 50, 2)) test(test_data_model)