def __init__(self, code ): child_row = DataManager.getRow( "SELECT * FROM Child WHERE ChildCode=?", [code] ) self.code = code self.cost = child_row['Cost'] self.skills = DataManager.getRows( "SELECT * FROM ChildSkill WHERE ChildCode=?", [code] ) self.needs = DataManager.getRows( "SELECT * FROM ChildNeed WHERE ChildCode=?", [code] )
def __init__(self, code ): hobby_row = DataManager.getRow( "SELECT * FROM Hobby WHERE HobbyCode=?", [code] ) self.code = code self.expense = hobby_row['Expense'] self.skills = DataManager.getRows( "SELECT * FROM HobbySkill WHERE HobbyCode=?", [code] ) self.needs = DataManager.getRows( "SELECT * FROM HobbyNeed WHERE HobbyCode=?", [code] )
class Test_2_DataManagerSyncStart(unittest.TestCase): def setUp(self): self.af = FeedRef((FeatureType.ADDRESS,FeedType.FEATURES)) self.ac = FeedRef((FeatureType.ADDRESS,FeedType.CHANGEFEED)) self.ar = FeedRef((FeatureType.ADDRESS,FeedType.RESOLUTIONFEED)) self.aff = FeatureFactory.getInstance(self.af) self.afc = FeatureFactory.getInstance(self.ac) self.afr = FeatureFactory.getInstance(self.ar) self.dm = DataManager() def tearDown(self): self.dm.close() del self.afr del self.afc del self.aff def test10_validdatastoreTest(self): '''Tests whether a valid address object is returned on json decoded arg''' initdata = self.dm.pull() self.assertEquals(len(initdata),5,'Invalid ADL list length returned') def test20_refreshTest(self): '''Tests whether a valid address object is returned on json decoded arg''' initdata = self.dm.pull() self.assertTrue(isinstance(initdata[self.af][0],Address),'Invalid address type returned') self.assertTrue(isinstance(initdata[self.ac][0],AddressChange),'Invalid address type returned') self.assertTrue(isinstance(initdata[self.ar][0],AddressResolution),'Invalid address type returned') def test30_refreshTest(self): pass def test40_refreshTest(self): pass
def __init__(self, code ): job_row = DataManager.getRow( "SELECT * FROM Job WHERE JobCode=?", [code] ) self.code = code self.pay = job_row['Pay'] self.skillRequirements = DataManager.getRows( "SELECT * FROM JobSkillRequirement WHERE JobCode=?", [code] ) self.needs = DataManager.getRows( "SELECT * FROM JobNeed WHERE JobCode=?", [code] )
def __init__(self, code ): partner_row = DataManager.getRow( "SELECT * FROM Partner WHERE PartnerCode=?", [code] ) self.code = code self.finances = partner_row['Finances'] self.moneyRequirement = partner_row['MoneyRequirement'] self.skillRequirements = DataManager.getRows( "SELECT * FROM PartnerSkillRequirement WHERE PartnerCode=?", [code] ) self.needs = DataManager.getRows( "SELECT * FROM PartnerNeed WHERE PartnerCode=?", [code] )
def add(self, task, projectName=None): date = Timings.now() if self.taskType(task) != "work": projectName = None attributes = self.processTask(date, task, projectName) DataManager.writeTask(date, task, projectName, firstToday=len(self.tasks) == 1) return attributes
def test_experiment_not_transformed_test(self): file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv" loaded_data = FileLoader.load_file(file_path) data_manager = DataManager() data_manager.set_data(loaded_data) data_manager.split_data(test_split=0.19, train_split=0.62) learning_model = FakePredictionModel() exp = Experiment(data_manager, learning_model) exp.run_experiment() self.assertEquals(0, exp.get_r2(SplitTypes.Test))
def test_experiment(self): output_filename_header = FileLoader.create_output_file() time.sleep(1) loaded_algorithm_combinations = FileLoader.read_csv_file("../Datasets/test.csv") file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv" loaded_data = FileLoader.load_file(file_path) # feature_eliminator = SelectKBest(f_regression,k=k_value) print (loaded_algorithm_combinations[0]) output_filename = FileLoader.create_output_file() for i in range(0, 80): normalizer = self.getnormalizer(loaded_algorithm_combinations[i][0]) feature_eliminator = self.getfeature_eliminator(loaded_algorithm_combinations[i][1]) the_model = self.get_model(loaded_algorithm_combinations[i][2]) print "taking ", type(normalizer).__name__, "and feature selector ", type( feature_eliminator ).__name__, "model", type(the_model).__name__ FileLoader.write_model_in_file( output_filename_header, type(normalizer).__name__, type(feature_eliminator).__name__, type(the_model).__name__, "", "", "", "", "", ) the_data_manager = DataManager(feature_eliminator, normalizer=normalizer) the_data_manager.set_data(loaded_data) the_data_manager.split_data(test_split=0.15, train_split=0.70) exp = Experiment(the_data_manager, the_model) exp.run_experiment() # arr_selected = feature_eliminator.get_support(indices=True) # if(exp.get_r2(SplitTypes.Train) > 0 and exp.get_r2(SplitTypes.Valid) > 0 and exp.get_r2(SplitTypes.Test) > 0): FileLoader.write_model_in_file( output_filename, type(normalizer).__name__, type(feature_eliminator).__name__, type(the_model).__name__, "", exp.fitness_matrix[0], exp.get_r2(SplitTypes.Train), exp.get_r2(SplitTypes.Valid), exp.get_r2(SplitTypes.Test), )
def calculateNeededGallons(): result = [] recentWateringGallons = DataManager.getPreviousWateringAmounts(pymysql.connect(host='localhost', user='******', password='', db='Garden', charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor)) sectorTargets = DataManager.getTargetCapacity(pymysql.connect(host='localhost', user='******', password='', db='Garden', charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor)) previousRain = DataManager.getLatestRainfall(pymysql.connect(host='localhost', user='******', password='', db='Garden', charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor)) predictedRain = DataManager.getPredictedRainfall(pymysql.connect(host='localhost', user='******', password='', db='Garden', charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor)) currentMoistures = DataManager.getLatestMoisture(pymysql.connect(host='localhost', user='******', password='', db='Garden', charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor)) for x in range(0, 4): currentGallons = (previousRain * 280) + recentWateringGallons[x + 1] if currentMoistures[x] > sectorTargets[x]: result.insert(x, 0) elif currentGallons > 280: result.insert(x, 0) else: if (predictedRain[1] * 280) * (predictedRain[0]/Decimal(100)) + currentGallons > 280: result.insert(x, 0) else: result.insert(x, 280 - ((predictedRain[1] * 280) * (predictedRain[0]/Decimal(100)) + currentGallons)) print((predictedRain[1] * 280) * (predictedRain[0]/Decimal(100)) + currentGallons) return result
def test_experiment_all_zeros_r2_1(self): the_data_manager = DataManager() array_all_zeroes = np.zeros((37, 397)) the_data_manager.set_data(array_all_zeroes) the_data_manager.split_data(test_split=0.19, train_split=0.62) the_model = svm.SVR() exp = Experiment(the_data_manager, the_model) exp.run_experiment() r2_train = exp.get_r2(SplitTypes.Train) expected = 1.0 self.assertEqual(r2_train, expected)
def test_experiment_svm_svr_37dataset_r2_train(self): file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv" loaded_data = FileLoader.load_file(file_path) the_data_manager = DataManager() the_data_manager.set_data(loaded_data) the_data_manager.split_data(test_split=0.19, train_split=0.62) the_model = svm.SVR() exp = Experiment(the_data_manager, the_model) exp.run_experiment() r2_train = exp.get_r2(SplitTypes.Train) expected_svm_r2_value = 0.93994377385638073 self.assertEqual(r2_train, expected_svm_r2_value)
def test_experiment_sum_of_squares_zeros_test(self): the_data_manager = DataManager() an_array_of_all_ones = np.ones((37, 397)) the_model = svm.SVR() the_data_manager.set_data(an_array_of_all_ones) the_data_manager.split_data(test_split=0.19, train_split=0.62) exp = Experiment(the_data_manager, the_model) exp.run_experiment() sum_of_squares_test = exp.get_sum_of_squares(SplitTypes.Test) expected = 0 self.assertEquals(expected, sum_of_squares_test)
def test_experiment_svr_37dataset_r2_test(self): file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv" loaded_data = FileLoader.load_file(file_path) the_data_manager = DataManager() the_data_manager.set_data(loaded_data) the_data_manager.split_data(test_split=0.19, train_split=0.62) the_model = svm.SVR() exp = Experiment(the_data_manager, the_model) exp.run_experiment() r2_test = exp.get_r2(SplitTypes.Test) expected_svm_r2_value = -0.33005242525900247 self.assertEqual(r2_test, expected_svm_r2_value)
def test_split_merge_csv_4_25_8(self): file_loader = FileLoader() data_manager = DataManager() file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv" result = file_loader.load_file(file_path) data_manager.set_data(result) data_manager.split_data(test_split=0.11,train_split=0.22) test_shapes = np.zeros((4, 397)).shape valid_shapes = np.zeros((25,397)).shape train_shapes = np.zeros((8, 397)).shape expected = np.array([test_shapes, valid_shapes, train_shapes]) result = np.array([data_manager.datum[SplitTypes.Test].shape, data_manager.datum[SplitTypes.Valid].shape, data_manager.datum[SplitTypes.Train].shape]) self.assertTrue(np.array_equal(result, expected))
def on_message(client, userdata, msg): print ('Topic: ', msg.topic, '\nMessage: ', str(msg.payload)) print("Peter:" + str(msg.payload)) arr = [x.strip() for x in str(msg.payload).split(',')] devId = (arr[0])[2:] tmStmp = arr[1] x = arr[2] y = arr[3] z = arr[4] lat = arr[5] long = arr[6] dm = DataManager() dm.insertDeviceData(devId,tmStmp,x,y,z,lat,long) return
def test_split_merge_csv_7_7_23(self): file_loader = FileLoader() data_manager = DataManager() file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv" result = file_loader.load_file(file_path) data_manager.set_data(result) data_manager.split_data(test_split=0.19,train_split=0.62) valid_and_test_shapes = (7, 397) train_shapes = (23, 397) expected = np.array([valid_and_test_shapes, valid_and_test_shapes, train_shapes]) result = np.array([data_manager.datum[SplitTypes.Test].shape, data_manager.datum[SplitTypes.Valid].shape, data_manager.datum[SplitTypes.Train].shape]) self.assertTrue(np.array_equal(result, expected))
def test_experiment_sum_of_squares_real37_test(self): file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv" loaded_data = FileLoader.load_file(file_path) the_data_manager = DataManager() the_data_manager.set_data(loaded_data) the_model = svm.SVR() the_data_manager.split_data(test_split=0.19, train_split=0.62) exp = Experiment(the_data_manager, the_model) exp.run_experiment() sum_of_squares_test = exp.get_sum_of_squares(SplitTypes.Test) expected = 6.708898437500002 self.assertAlmostEqual(expected, sum_of_squares_test)
class Test_1_DataManagerFunctionTest(unittest.TestCase): def setUp(self): self.dm = DataManager() def tearDown(self): self.dm.close() def test10_parseAddressTest(self): '''Tests whether a valid address object is returned on json decoded arg''' assert True def test20_pullTest(self): '''Tests whether we get a valid list[group[address]]''' assert True
def test_split_into_target_and_input(self): file_loader = FileLoader() data_manager = DataManager() file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv" result = file_loader.load_file(file_path) data_manager.set_data(result) data_manager.split_data(test_split=0.11,train_split=0.22) test_shapes_input = np.zeros((4, 396)).shape valid_shapes_input = np.zeros((25,396)).shape train_shapes_input = np.zeros((8, 396)).shape test_shapes_target = np.zeros((4, )).shape valid_shapes_target = np.zeros((25,)).shape train_shapes_target = np.zeros((8, )).shape expected = np.array([test_shapes_input, valid_shapes_input, train_shapes_input, test_shapes_target, valid_shapes_target, train_shapes_target]) result = np.array([data_manager.inputs[SplitTypes.Test].shape, data_manager.inputs[SplitTypes.Valid].shape, data_manager.inputs[SplitTypes.Train].shape, data_manager.targets[SplitTypes.Test].shape, data_manager.targets[SplitTypes.Valid].shape, data_manager.targets[SplitTypes.Train].shape]) self.assertTrue(np.array_equal(result, expected))
def __init__(self, config, logger): self.config = config self.logger = logger self.start_date = config.get_value('PORTFOLIO','startdate') self.end_date = config.get_value('PORTFOLIO', 'enddate') self.name = self.config.get_value('PORTFOLIO', 'name') self.prevent_overlaps = {} ## used to disallow same ticker+markout overlapping ## Get the list of indicators from the config file, then start IndicatorLibrary self.list_of_user_indicators = [s.upper() for s in Util.str_to_list(config.get_value('STRATEGY', 'indicators'))] if not self.list_of_user_indicators: self.logger.critical("Unable to determine list of user indicators") sys.exit(1) self.strategy = IndicatorLibrary(self.list_of_user_indicators) self.list_of_markout_periods = [] try: for x in Util.str_to_list(config.get_value('STRATEGY', 'markout_periods')): self.list_of_markout_periods.append(int(x)) except: self.logger.critical("Non integer-type value provided in STRATEGY.markout_periods") sys.exit(1) max_markout_periods = max(self.list_of_markout_periods) max_historical_periods = self.strategy.periods_required() self.dm = DataManager(logger, self.start_date,self.end_date,max_historical_periods,max_markout_periods) self.__trade_log_fn()
def __init__(self): """ Initialize the program prompting the instruction to the program """ self.printProgramInfos() self.datamanager = DataManager() self.dataexplorer = DataExplorer()
def countTasks(self): """Count tasks statistics divided by projects""" self._data = DataManager.getByRange(self._fromDate, self._toDate) res = {} for date, task, projectName in self._data: if task == "__start__": self.timings.setPrevDate(None) spentSeconds = self.timings.count(date, Tasks.taskType(task)) if Tasks.taskType(task) != "work": continue if spentSeconds: if projectName not in res: res[projectName] = {} if task not in res[projectName]: res[projectName][task] = spentSeconds else: res[projectName][task] += spentSeconds self._countAttrib([v for k in res for v in res[k].values()]) if res: ret = {} for k in res.keys(): ret[k] = sorted(res[k].iteritems(), key=lambda item:item[1], reverse=True) return ret else: return {}
def calc(self, DataManager, ticker, date): quote_list = DataManager.get(ticker, date, -20) quote = quote_list.pop() hist_volume = [] for q in quote_list[-20:]: hist_volume.append(q.volume) return quote.volume > mean(hist_volume) * 2
def _countObject(self, objType, targetAction): """Generic function for calculating projects data or slacking statistics""" self._data = DataManager.getByRange(self._fromDate, self._toDate) res = {} for date, task, projectName in self._data: if task == "__start__": self.timings.setPrevDate(None) objKey = projectName if objType == "project" else task spentSeconds = self.timings.count(date, Tasks.taskType(task)) if Tasks.taskType(task) != targetAction: self.timings.setPrevDate(date) continue if spentSeconds: if objKey not in res: res[objKey] = spentSeconds else: res[objKey] += spentSeconds self._countAttrib(res.values()) if res: return sorted(res.iteritems(), key=lambda item:item[1], reverse=True) else: return []
def DataExplorerUserInputLoop(self): """ Create a loop asking the user which action he or she wants to take. The loop is break (and the program ends) whenever the user type quit. """ userInput="" try: while userInput != "quit": self.printDataExploreOptions() userInput = raw_input("\nPlease provide the input : ") if userInput == "1": DataExplorer.generalAnalysis(DataManager.cleaned_data, DataManager.binaryTree(DataManager.cleaned_data)) elif userInput == "2": DataExplorer.printVideoCategories() userInputVideoCatagory = raw_input("\nPlease provide the number of the Video Catagory : ") DataExplorer.individual_videocatagory_analysis(DataManager.cleaned_data , userInputVideoCatagory) elif userInput == "3": DataExplorer.printCategories() userInputfeature = raw_input("\nPlease provide the number of the feature : ") DataExplorer.individual_feature_analysis(DataManager.cleaned_data,userInputfeature) elif userInput == "4": self.InitiateFlow() elif userInput == "quit": self.ExitProgram() except KeyboardInterrupt: print "quitting..." sys.exit()
def setUp(self): self.af = FeedRef((FeatureType.ADDRESS,FeedType.FEATURES)) self.ac = FeedRef((FeatureType.ADDRESS,FeedType.CHANGEFEED)) self.ar = FeedRef((FeatureType.ADDRESS,FeedType.RESOLUTIONFEED)) self.aff = FeatureFactory.getInstance(self.af) self.afc = FeatureFactory.getInstance(self.ac) self.afr = FeatureFactory.getInstance(self.ar) self.dm = DataManager()
def calc(self, DataManager, ticker, date): quote_list = DataManager.get(ticker, date, -200) quote_today = quote_list.pop() quote_yesterday = quote_list.pop() hist_close = [] for q in quote_list[-200:]: hist_close.append(q.close) return quote_today.close > mean(hist_close) & quote_yesterday.close < mean(hist_close)
def calc(self, DataManager, ticker, date): quote_list = DataManager.get(ticker, date, -260) quote = quote_list.pop() is_new_high = True for q in quote_list[-260:]: if quote.close < q.close: is_new_high = False return is_new_high
def playGames( pDecisionMaker=None, pPlayerCodes=[], pNumRounds=1000, pOutPath='.' ): outPath = pOutPath DataManager.initSettings() DataManager.settings['gameResultsDbPath'] = "%s/games.db"%(outPath) DataManager.createGameDb() game = Game() if pDecisionMaker != None: game.decisionMaker = pDecisionMaker playerCodes = pPlayerCodes if len(playerCodes) == 0: for card in game.playerCardDeck: playerCodes.append( card.code ) playerCodes = sorted(playerCodes) scores = [] playerScores = {} DataManager.clearGameLogDb() for code in playerCodes: playerScores[code] = [] for j in range(pNumRounds): if j%10 == 0: print "%s: Round %d"%(code,j) game.resetGame() game.addPlayer( code ) while game.isNextStep(): game.performNextStep( game.decisionMaker.makeDecision( game, game.nextStepAvailableActions() ) ) DataManager.insertGameLogIntoDb(game.gameLog) scores.append( game.players[0].points() ) playerScores[code].append( game.players[0].points() ) DataManager.closeConnection("gameConn") line = "Avg Score: %.2f"%( sum(scores) / float(len(scores) ) ) fileOut = open( "%s/results.txt"%(outPath), 'wb' ) fileOut.write( line+'\n' ) print line for player in sorted(playerScores): line = "%s Avg Score: %.2f"%(player, sum(playerScores[player]) / float(len(playerScores[player]))) print line fileOut.write( line+'\n' )
def setUp(self): self.dm = DataManager(ref_int) self.af = FeedRef((FeatureType.ADDRESS,FeedType.FEATURES)) self.ac = FeedRef((FeatureType.ADDRESS,FeedType.CHANGEFEED)) self.ar = FeedRef((FeatureType.ADDRESS,FeedType.RESOLUTIONFEED)) self.afc = FeatureFactory.getInstance(self.ac) self.afr = FeatureFactory.getInstance(self.ar) self.addr_r = _getTestAddress(af[FeedType.FEATURES])
def easyBuildDataManager(load=False, save=True): ''' method helper for building data manager ''' dm = DataManager() if (load): dm.load(DataManager.DEFAULT_BACKUP_FILENAME) else: gfns = GoogleFinanceNewsSource() gfms = GoogleFinanceMarketSource() rns = ReutersNewsSource('/home/droz/corpus/headlines-docs.csv') dm.addNewsSource(gfns) dm.addNewsSource(rns) dm.setMarketSource(gfms) if (save): dm.save(DataManager.DEFAULT_BACKUP_FILENAME) return dm
conn = psycopg2.connect( database=url.path[1:], user=url.username, password=url.password, host=url.hostname, port=url.port ) """ ########################################## # Init bot. ########################################## from DataManager import DataManager data_manager = DataManager(conn) from CianCianBot import CianCianBot bot = CianCianBot(data_manager) ########################################## # Init flask backend and linebot facility. ########################################## from flask import Flask, request, abort from linebot import (LineBotApi, WebhookHandler) from linebot.exceptions import (InvalidSignatureError) from linebot.models import ( MessageEvent, TextMessage,
if FLAGS.model == "ar": from Models.AR import Model elif FLAGS.model == "lm": from Models.LM import Model elif FLAGS.model == "sar": from Models.SAR import Model else: raise EOFError train_dir = FLAGS.train_dir + "/" + FLAGS.model if not os.path.exists(FLAGS.train_dir): os.mkdir(FLAGS.train_dir) if not os.path.exists(train_dir): os.mkdir(train_dir) dataManager = DataManager() num_words, num_idioms = dataManager.get_num() word_embed_matrix, idiom_embed_matrix = dataManager.get_embed_matrix() def prepare_batch_data(document, candidates, ori_labels, ori_locs): # padding docs batch_size = len(document) doc_length = [len(doc) for doc in document] max_length = max(doc_length) mask = np.zeros((batch_size, max_length), dtype=np.float32) for i in range(batch_size): document[i] = document[i] + [0] * (max_length - doc_length[i]) mask[i, :doc_length[i]] = 1 document = np.array(document, dtype=np.int32) doc_length = np.array(doc_length, dtype=np.int32)
def delete(node,row_id): return jsonify(DataManager().delete(node,row_id))
def create(node): if not request.json: abort(400) return jsonify(DataManager().add_row(node,request.json))
action="store_true", help="Plot % of active cases of population") args = p.parse_args() if args.all: args.active = True args.recovered = True args.deaths = True args.population_percent = True logger = Logger("log", autosave=True) if not args.summary and not args.summary_only and not ( args.active or args.recovered or args.deaths or args.population_percent): logger.warning( "No output specified (active/recovered etc.). Use the -h option to get more information." ) exit(0) manager = DataManager(logger, args.countries, True) if args.summary_only: manager.load_summary() print_summary() exit(0) elif args.summary: manager.load_summary() print_summary() present_history(args.countries)
tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement") tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices") FLAGS = tf.flags.FLAGS FLAGS._parse_flags() print("\nParameters:") for attr, value in sorted(FLAGS.__flags.items()): print("{}={}".format(attr.upper(), value)) print("") # Data Preparation # ================================================== dataManager = DataManager() # Load data print("Loading training data...") x_text, y, _ = dataManager.load_training_data() print("Finish loading data") x = [] for data in x_text: a = 100 - len(data) if a > 0: front = a / 2 back = a - front front_vec = [ np.zeros(dataManager.wordvector_dim + 2) for j in range(front) ]
class OperateProcess(multiprocessing.Process): def __init__(self, name): multiprocessing.Process.__init__(self) # 重构了Process类里面的构造函数 self.name = name self.record = {} # {"ean":[price, count]} def exceptHandler(self, info): info = time.strftime("%Y-%m-%d %H:%M:%S") + "\n" + info print(info) self.debug_file.write(info) self.debug_file.flush() def run(self): # 固定用run方法,启动进程自动调用run方法 self.debug_file = open(self.name + ".debuginfo", "a") self.database = DataManager(self.name) printYellow("启动后台改价任务") while 1: chrome_dir = "../chrome_url.txt" f = open(chrome_dir, "r") # 设置文件对象 str = f.read() # 将txt文件的所有内容读入到字符串str中 f.close() # 将文件关闭 option = webdriver.ChromeOptions() option.add_argument("--user-data-dir=" + os.path.abspath(str)) option.add_argument('--no-sandbox') option.add_argument('--disable-dev-shm-usage') # option.add_argument("headless") option.add_argument('ignore-certificate-errors') option.add_argument('log-level=3') option.add_argument('lang=zh_CN.UTF-8') prefs = { 'profile.default_content_setting_values': { 'images': 2, 'stylesheet': 2, } } option.add_experimental_option('prefs', prefs) self.chrome = webdriver.Chrome(executable_path=CHROME_DRIVER_PATH, chrome_options=option) self.chrome.maximize_window() try: self.LoginAccount() except: self.exceptHandler(traceback.format_exc()) self.chrome.quit() self.database.handlerStatus() continue def LoginAccount(self): self.database.handlerStatus() printYellow("后台:登录账户") account, password = self.database.getAccountAndPassword() self.chrome.get("https://uae.souq.com/ae-en/login.php") try: elemNewAccount = self.chrome.find_element_by_id("email") elemNewLoginBtn = self.chrome.find_element_by_id("siteLogin") elemNewAccount.send_keys(account) print("输入账户:" + account) elemNewLoginBtn.click() print("点击siteLogin") try: cssSelectText = "#continue" WebDriverWait(self.chrome, 10, 0.5).until(EC.presence_of_element_located((By.CSS_SELECTOR, cssSelectText))) print("获取到continue按钮") elemContinue = self.chrome.find_element_by_id("continue") elemContinue.click() print("点击continue") cssSelectText = "#ap_password" WebDriverWait(self.chrome, 20, 0.5).until(EC.presence_of_element_located((By.CSS_SELECTOR, cssSelectText))) print("获取到password输入框") elemPassword = self.chrome.find_element_by_id("ap_password") elemLoginBtn = self.chrome.find_element_by_id("signInSubmit") elemPassword.send_keys(Keys.CONTROL + "a") elemPassword.send_keys(password) print("输入密码:********") elemLoginBtn.click() print("点击continue") except: print("方式一登录失败,尝试方式二登录") cssSelectText = "#password" WebDriverWait(self.chrome, 20, 0.5).until(EC.presence_of_element_located((By.CSS_SELECTOR, cssSelectText))) print("获取到password输入框") elemPassword = self.chrome.find_element_by_id("password") elemLoginBtn = self.chrome.find_element_by_id("siteLogin") elemPassword.clear() elemPassword.send_keys(password) print("输入密码:********") elemLoginBtn.click() print("点击登录") cssSelectText = "#search_box" WebDriverWait(self.chrome, 20, 0.5).until(EC.presence_of_element_located((By.CSS_SELECTOR, cssSelectText))) except: if str(self.chrome.current_url).find("uae.souq.com/ae-en/account.php") < 0: raise while 1: try: ret = self.NewInventory() if ret == -1: return -1 except: raise def NewInventory(self): if not self.database.shopLock(): printYellow("后台:已经超出店铺数量限制") self.database.setStopStatus() while True: time.sleep(6000) printYellow("后台:打开改价页面") self.loginHandler = self.chrome.current_window_handle unknownHandler = "" for handler in self.chrome.window_handles: if handler != self.loginHandler: unknownHandler = handler break readyUri = "https://sell.souq.com/fbs-inventory" js = 'window.open("' + readyUri + '")' self.chrome.execute_script(js) handlers = self.chrome.window_handles for handler in handlers: if handler != self.loginHandler and handler != unknownHandler: self.inventoryFbsHandler = handler break readyUri = "https://sell.souq.com/inventory/inventory-management" js = 'window.open("' + readyUri + '")' self.chrome.execute_script(js) handlers = self.chrome.window_handles for handler in handlers: if handler != self.loginHandler and handler != unknownHandler and handler != self.inventoryFbsHandler: self.inventoryHandler = handler break printYellow("后台:开始改价") while 1: try: ret = self.OperateProductSelenium() if ret == -2: printYellow("后台:未获取到搜索框,将刷新界面") self.chrome.refresh() elif ret == -1: return -1 except: self.exceptHandler(traceback.format_exc()) self.chrome.refresh() continue def OperateProductSelenium(self): while True: self.database.handlerStatus() time.sleep(1) ean, price, variant_name, is_fbs = self.database.getFirstNeedChangeItem() if ean == "ean" and price == "price": continue if is_fbs == 1: self.chrome.switch_to.window(self.inventoryFbsHandler) out = time.strftime("%Y-%m-%d %H:%M:%S") + " " + ean + "[fbs]\t" + str(round(price, 2)) else: self.chrome.switch_to.window(self.inventoryHandler) out = time.strftime("%Y-%m-%d %H:%M:%S") + " " + ean + "[]\t" + str(round(price, 2)) WebDriverWait(self.chrome, 240, 0.5).until(self.checkPage) change_count, flag = self.database.isLowerThanMaxTimes(ean, variant_name) if not flag: out += "[" + str(change_count) + "次]" printRed("后台:" + out + "\t达到最大改价次数") self.database.finishOneChangeItem(ean, price, variant_name) continue try: elemInput = self.chrome.find_elements_by_xpath(".//div[@class='row collapse advanced-search-container']//input") elemSearch = self.chrome.find_elements_by_xpath(".//a[@class='button postfix']") if not (len(elemInput) > 0 or len(elemSearch) > 0): return -2 oldEan = elemInput[0].get_attribute("value") elemInput[0].clear() elemInput[0].send_keys(ean) self.chrome.execute_script("arguments[0].click()", elemSearch[0]) count = 0 if ean != oldEan: while count < 8: elemLoading = self.chrome.find_element_by_xpath(".//div[@class='filterView']/div[3]") if elemLoading.get_attribute("loading") == "1": break time.sleep(0.5) count += 1 time.sleep(1) count = 0 while count < 14: elemLoading = self.chrome.find_element_by_xpath(".//div[@class='filterView']/div[3]") if elemLoading.get_attribute("loading") == "0": break time.sleep(0.5) count += 1 time.sleep(1.5) elemProduct = self.chrome.find_elements_by_xpath(".//table[@id='table-inventory']/tbody/tr[1]/td[4]") if len(elemProduct) <= 0 or count >= 14: printRed("后台:" + out + "\t没找到这个产品") self.database.finishOneChangeItem(ean, price, variant_name) continue self.chrome.execute_script("arguments[0].click()", elemProduct[0]) elemPriceInput = self.chrome.find_elements_by_xpath(".//input[@id='editableInput']") while len(elemPriceInput) <= 0: elemPriceInput = self.chrome.find_elements_by_xpath(".//input[@id='editableInput']") if len(elemPriceInput) <= 0: printRed("后台:" + out + "\t无法获取产品的价格修改控件") self.database.finishOneChangeItem(ean, price, variant_name) continue old_price = price + 1 elemPriceInput[0].clear() elemPriceInput[0].send_keys(str(price)) elemBtn = self.chrome.find_elements_by_xpath(".//a[@class='tiny accept-btn']") if len(elemBtn) <= 0: printRed("后台:" + out + "\t无法修改价格确定按钮") self.database.finishOneChangeItem(ean, price, variant_name) continue time_change = time.strftime("%Y-%m-%d %H:%M:%S") self.chrome.execute_script("arguments[0].click()", elemBtn[0]) self.database.addAChange(ean, variant_name, old_price, price) self.database.addChangeRecord(ean, variant_name, time_change, price) out += "[" + str(change_count + 1) + "次]" printYellow("后台:" + out + "\t改价成功") self.database.finishOneChangeItem(ean, price, variant_name) except: self.exceptHandler(traceback.format_exc()) self.database.finishOneChangeItem(ean, price, variant_name) return -2 def checkPage(self, driver): checkPageFinishScript = "try {if (document.readyState !== 'complete') {return false;} if (window.jQuery) { if (" \ "window.jQuery.active) { return false; } else if (window.jQuery.ajax && " \ "window.jQuery.ajax.active) { return false; } } if (window.angular) { if (!window.qa) { " \ "window.qa = {doneRendering: false }; } var injector = window.angular.element(" \ "'body').injector(); var $rootScope = injector.get('$rootScope'); var $http = " \ "injector.get('$http'); var $timeout = injector.get('$timeout'); if ($rootScope.$$phase " \ "=== '$apply' || $rootScope.$$phase === '$digest' || $http.pendingRequests.length !== 0) " \ "{ window.qa.doneRendering = false; return false; } if (!window.qa.doneRendering) { " \ "$timeout(function() { window.qa.doneRendering = true;}, 0); return false;}} return " \ "true;} catch (ex) {return false;} " return driver.execute_script(checkPageFinishScript)
parser.add_argument('--interval', type=int, default=10) # 解析设置的参数 args, _ = parser.parse_known_args(argv) # 配置日志文件格式 logging.basicConfig( filename=('log/%s.log' % args.name) * (1 - args.screen), level=logging.DEBUG, format='%(asctime)s %(filename)s[line:%(lineno)d] %(message)s', datefmt='%H:%M:%S') # 加载语料文本,情感词,否定词,强度词文本 dm = DataManager( args.dataset, { 'negation': 'negation.txt', 'intensifier': 'intensifier.txt', 'sentiment': 'sentiment.txt' }) # 从原始语料提取各类别词语 dm.gen_word_list() # 将词语转成数值列表,构建出训练、验证和测试集 dm.gen_data() # 构建模型 model = Model(dm.words, dm.grained, argv) # 实例化评价器 Evaluator = EvaluatorList[dm.grained] def do_train(label, data):
import Logger from DataManager import DataManager import os logger_name = "UpdateDBClasses" Logger.setup(logger_name) file_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), "Generated/DatabaseClasses.py") manager = DataManager(logger_name, email=False) manager.update_classes_file(file_path)
def load_dataset(self): dataManager = DataManager(self.dataset_path) self.data = dataManager.GetData()
def validate(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") TAG_EMBEDDING_DIM = 64 VAL_EMBEDDING_DIM = 64 HIDDEN_DIM = 1500 NUM_EPOCHS = 2 # 8 LAYER_NUM = 1 BATCH_SIZE = 256 data_manager = DataManager(TRAIN) tag_to_idx, idx_to_tag = data_manager.get_tag_dicts() val_to_idx, idx_to_val = data_manager.get_val_dicts() # ad hoc adding of UNKOWN val_to_idx["UNK"] = len(val_to_idx) idx_to_val[len(val_to_idx) - 1] = "UNK" train_split_idx = int(len(data_manager.get_data()) * 0.05) validate_split_idx = int(len(data_manager.get_data()) * 0.07) data_val = torch.Tensor([( tag_to_idx[(tag, have_children, have_sibling)], val_to_idx.get(val, val_to_idx["UNK"]), ) for tag, val, have_children, have_sibling in ( data_manager.get_data()[train_split_idx:validate_split_idx])]) val_data = torch.utils.data.DataLoader( Dataset(data_val), BATCH_SIZE, shuffle=False, drop_last=True, num_workers=0, pin_memory=True, ) model_tag = torch.load("D://data//budala_16.pickle") # model_val = LSTMValue( # VAL_EMBEDDING_DIM, HIDDEN_DIM, len(val_to_idx), len(val_to_idx), LAYER_NUM # ) loss_function = nn.NLLLoss() optimizer_tag = optim.Adam(model_tag.parameters()) # optimizer_val = optim.Adam(model_val.parameters()) # -----------putting everything on GPU--------- model_tag.cuda() for epoch in range(NUM_EPOCHS): start_time = time.time() summary_writer = SummaryWriter() model_tag.eval() # model_val.eval() correct_tag = 0 # correct_val = 0 loss_sum_tag = 0 # loss_sum_val = 0 cnt = 0 ep_cnt = 0 with torch.no_grad(): for i, (sentence, y) in tqdm( enumerate(val_data), total=len(val_data), desc=f"Epoch: {epoch}", unit="batches", ): global_step_val = epoch * len(val_data) + i sentence_tag = sentence[:, :, 0].to(device) y_tag = y[:, 0].to(device) y_pred_tag = model_tag(sentence_tag) # sentence_val = sentence[:, :, 1].to(device) # y_val = y[:, 1].to(device) # y_pred_val = model_val(sentence_val) correct_tag += (y_pred_tag.argmax(dim=1) == y_tag).sum().item() # correct_val += (y_pred_val.argmax(dim=1) == y_val).sum().item() loss_tag = loss_function(y_pred_tag, y_tag.long()) # loss_val = loss_function(y_pred_val, y_val.long()) summary_writer.add_scalar("validation_loss_tag", loss_tag, global_step_val) # summary_writer.add_scalar("validation_loss_val", loss_val, global_step_val) loss_sum_tag += loss_tag # loss_sum_val += loss_val ep_cnt += 1 cnt += y_tag.size(0) print( f"Validation tag: loss {loss_sum_tag/ep_cnt}, accuracy:{100*correct_tag/cnt}" ) # print( # f"Validation val: loss {loss_sum_val/ep_cnt}, accuracy:{100*correct_val/cnt}" # ) print(f"Epoch ended, time taken {time.time()-start_time}s")
def train(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") TAG_EMBEDDING_DIM = 100 VAL_EMBEDDING_DIM = 64 HIDDEN_DIM = 1500 NUM_EPOCHS = 2 # 8 LAYER_NUM = 1 BATCH_SIZE = 256 data_manager = DataManager(TRAIN) d = data_manager.get_data() tag_to_idx, idx_to_tag = data_manager.get_tag_dicts() val_to_idx, idx_to_val = data_manager.get_val_dicts() # ad hoc adding of UNKOWN val_to_idx["UNK"] = len(val_to_idx) idx_to_val[len(val_to_idx) - 1] = "UNK" train_split_idx = int(len(data_manager.get_data()) * 0.9) validate_split_idx = int(len(data_manager.get_data()) * 0.92) data_train = torch.Tensor([( tag_to_idx[(tag, have_children, have_sibling)], val_to_idx.get(val, val_to_idx["UNK"]), ) for tag, val, have_children, have_sibling in ( data_manager.get_data()[:train_split_idx])]) data_val = torch.Tensor([( tag_to_idx[(tag, have_children, have_sibling)], val_to_idx.get(val, val_to_idx["UNK"]), ) for tag, val, have_children, have_sibling in ( data_manager.get_data()[train_split_idx:validate_split_idx])]) training_data = torch.utils.data.DataLoader(Dataset(data_train), BATCH_SIZE, shuffle=True, drop_last=True, num_workers=0) # test_data = None val_data = torch.utils.data.DataLoader(Dataset(data_val), BATCH_SIZE, shuffle=False, drop_last=True, num_workers=0) model_tag = LSTMTagger(TAG_EMBEDDING_DIM, HIDDEN_DIM, len(tag_to_idx), len(tag_to_idx), LAYER_NUM) # model_val = LSTMValue( # VAL_EMBEDDING_DIM, HIDDEN_DIM, len(val_to_idx), len(val_to_idx), LAYER_NUM # ) loss_function = nn.NLLLoss() optimizer_tag = optim.SGD(model_tag.parameters(), 0.001) # optimizer_val = optim.Adam(model_val.parameters()) # -----------putting everything on GPU--------- model_tag.cuda() # model_val.cuda() # --------------------------------------------- model_iter = 1 for epoch in range(NUM_EPOCHS): summary_writer = SummaryWriter() model_tag.train() # model_val.train() start_time = time.time() cnt = 0 for i, (sentence, y) in tqdm( enumerate(training_data), total=len(training_data), desc=f"Epoch: {epoch}", unit="batches", ): global_step = epoch * len(training_data) + i size = int(sentence.size(0)) model_tag.zero_grad() # model_val.zero_grad() sentence_tag = sentence[:, :, 0].to(device) y_tag = y[:, 0].to(device) # sentence_val = sentence[:, :, 1].to(device) # y_val = y[:, 1].to(device) y_pred_tag = model_tag(sentence_tag) # y_pred_val = model_val(sentence_val) correct_tag = (y_pred_tag.argmax(dim=1) == y_tag).sum().item() # correct_val = (y_pred_val.argmax(dim=1) == y_val).sum().item() # long treba jer y_tag treba da predstavlja rešenje loss_tag = loss_function(y_pred_tag, y_tag.long()) # loss_val = loss_function(y_pred_val, y_val.long()) summary_writer.add_scalar("Tag train loss", loss_tag, global_step) summary_writer.add_scalar("Tag accuracy", 100 * (correct_tag / size), global_step) # summary_writer.add_scalar("Val train loss", loss_val, global_step) # summary_writer.add_scalar( # "Val accuracy", 100 * (correct_val / size), global_step # ) loss_tag.backward() # loss_val.backward() nn.utils.clip_grad_value_(model_tag.parameters(), 5.0) # nn.utils.clip_grad_value_(model_val.parameters(), 5.0) optimizer_tag.step() # optimizer_val.step() if i % 5000 == 0: torch.save(model_tag, f"{DATA_ROOT}budala_{model_iter}.pickle") model_iter += 1 model_tag.eval() # model_val.eval() correct_tag = 0 correct_val = 0 loss_sum_tag = 0 loss_sum_val = 0 cnt = 0 ep_cnt = 0 with torch.no_grad(): for i, (sentence, y) in tqdm( enumerate(val_data), total=len(val_data), desc=f"Epoch: {epoch}", unit="batches", ): global_step_val = epoch * len(val_data) + i sentence_tag = sentence[:, :, 0].to(device) y_tag = y[:, 0].to(device) y_pred_tag = model_tag(sentence_tag) # sentence_val = sentence[:, :, 1].to(device) # y_val = y[:, 1].to(device) # y_pred_val = model_val(sentence_val) correct_tag += (y_pred_tag.argmax(dim=1) == y_tag).sum().item() # correct_val += (y_pred_val.argmax(dim=1) == y_val).sum().item() loss_tag = loss_function(y_pred_tag, y_tag.long()) # loss_val = loss_function(y_pred_val, y_val.long()) summary_writer.add_scalar("validation_loss_tag", loss_tag, global_step_val) # summary_writer.add_scalar("validation_loss_val", loss_val, global_step_val) loss_sum_tag += loss_tag # loss_sum_val += loss_val ep_cnt += 1 cnt += y_tag.size(0) print( f"Validation tag: loss {loss_sum_tag/ep_cnt}, accuracy:{100*correct_tag/cnt}" ) # print( # f"Validation val: loss {loss_sum_val/ep_cnt}, accuracy:{100*correct_val/cnt}" # ) print(f"Epoch ended, time taken {time.time()-start_time}s") torch.save(model_tag, "D://data//first_model_tag.pickle")
return self.predictions[0][now_time] else: return self.predictions[now_time] if __name__ == '__main__': import yaml import sys sys.path.insert(0, '..') from DataManager import DataManager from xbos import get_client with open("../config_file.yml", 'r') as ymlfile: cfg = yaml.load(ymlfile) with open("../Buildings/ciee/ZoneConfigs/HVAC_Zone_Eastzone.yml", 'r') as ymlfile: advise_cfg = yaml.load(ymlfile) if cfg["Server"]: c = get_client(agent=cfg["Agent_IP"], entity=cfg["Entity_File"]) else: c = get_client() dm = DataManager(cfg, advise_cfg, c, "HVAC_Zone_Eastzone") occ = Occupancy(dm.preprocess_occ(), 15, 4, 4, advise_cfg["Advise"]["Occupancy_Sensors"]) for i in range(10): print "Intervals ahead: " + str(i) print occ.occ(i)
def train(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") TAG_EMBEDDING_DIM = 64 VAL_EMBEDDING_DIM = 128 HIDDEN_DIM = 1500 NUM_EPOCHS = 2 LAYER_NUM = 1 BATCH_SIZE = 256 data_manager_train = DataManager(TRAIN) data_manager_eval = DataManager(TEST) warnings.filterwarnings("ignore") tag_to_idx, idx_to_tag = data_manager_train.get_tag_dicts() val_to_idx, idx_to_val = data_manager_train.get_val_dicts() validate_split_idx = int(len(data_manager_eval.get_data()) * 0.04) # 2000 za eval data_train = torch.Tensor([( tag_to_idx.get((tag, have_children, have_sibling), tag_to_idx["UNK"]), val_to_idx.get(val, val_to_idx["UNK"]), ) for tag, val, have_children, have_sibling in ( data_manager_train.get_data())]) data_eval = torch.Tensor([( tag_to_idx.get((tag, have_children, have_sibling), tag_to_idx["UNK"]), val_to_idx.get(val, val_to_idx["UNK"]), ) for tag, val, have_children, have_sibling in ( data_manager_eval.get_data()[:validate_split_idx])]) train_data_loader = torch.utils.data.DataLoader(Dataset(data_train), BATCH_SIZE, shuffle=True, drop_last=True, num_workers=8) eval_data_loader = torch.utils.data.DataLoader(Dataset(data_eval), BATCH_SIZE, shuffle=False, drop_last=True, num_workers=8) model_tag = nn.DataParallel( AtentionModel(len(tag_to_idx), len(val_to_idx), TAG_EMBEDDING_DIM, VAL_EMBEDDING_DIM, HIDDEN_DIM, LAYER_NUM, False)) model_val = nn.DataParallel( AtentionModel(len(tag_to_idx), len(val_to_idx), TAG_EMBEDDING_DIM, VAL_EMBEDDING_DIM, HIDDEN_DIM, LAYER_NUM, True)) #model = torch.load(f"D://data//model_attention_1.pickle") loss_function = nn.NLLLoss() optimizer_tag = optim.Adam(model_tag.parameters()) optimizer_val = optim.Adam(model_val.parameters()) # -----------putting models on GPU------------- model_tag.cuda() model_val.cuda() # --------------------------------------------- model_iter = 1 # Sluzi za Tensorboard summary_writer = SummaryWriter() for epoch in range(NUM_EPOCHS): model_tag.train() model_val.train() for i, (sentence, y) in tqdm( enumerate(train_data_loader), total=len(train_data_loader), desc=f"Epoch: {epoch}", unit="batches", ): global_step = epoch * len(train_data_loader) + i size = int(sentence.size(0)) model_tag.zero_grad() model_val.zero_grad() model_tag.train() model_val.train() unk_idx = val_to_idx["UNK"] mask_unk = y[:, 1] != unk_idx # mask for all y val that are not UNK sentence_tag = sentence.to(device) y_pred_tag = model_tag(sentence_tag) y = y.to(device) correct_tag = (y_pred_tag.argmax(dim=1) == y[:, 0]).sum().item() loss_tag = loss_function(y_pred_tag, y[:, 0].long()) summary_writer.add_scalar("model_tag: train loss", loss_tag, global_step) summary_writer.add_scalar("model_tag: accuracy", 100 * (correct_tag / size), global_step) loss_tag.backward() nn.utils.clip_grad_value_(model_tag.parameters(), 5.0) optimizer_tag.step() loss_val = 0 if mask_unk.sum() > 0: # do forward for val_model sentence_val = sentence[mask_unk, :, :].to(device) y_pred_val = model_val(sentence_val) y = y.to(device) correct_val = (y_pred_val.argmax(dim=1) == y[mask_unk, 1]).sum().item() loss_val = loss_function(y_pred_val, y[mask_unk, 1].long()) summary_writer.add_scalar("model_value: train loss", loss_val, global_step) summary_writer.add_scalar("model_value: train accuracy", 100 * (correct_val / size), global_step) loss_val.backward() nn.utils.clip_grad_value_(model_val.parameters(), 5.0) optimizer_val.step() if (i + 1) % 200 == 0: tag = f"TRAIN tag accuracy: {100 * (correct_tag / size)}, tag loss: {loss_tag}, " val = f"val accuracy: {100 * (correct_val / size)}, val loss: {loss_val}\n" with open(f'{DATA_ROOT}log.txt', 'a') as log: log.write(tag) log.write(val) TIME_FOR_EVAL = 2500 if (i + 1) % TIME_FOR_EVAL == 0: #evaluation torch.save( model_tag, f"D://data//models//tag//budala_{model_iter}.pickle") torch.save( model_val, f"D://data//models//val//budala_{model_iter}.pickle") model_iter += 1 model_tag.eval() model_val.eval() correct_sum_tag = 0 correct_sum_val = 0 loss_sum_tag = 0 loss_sum_val = 0 size_sum_eval = 0 with torch.no_grad(): for i_eval, (sentence_eval, y_eval) in tqdm( enumerate(eval_data_loader), total=len(eval_data_loader), desc=f"Epoch eval: {global_step//TIME_FOR_EVAL}", unit="batches", ): global_step_eval = (global_step // TIME_FOR_EVAL ) * len(eval_data_loader) + i_eval size_eval = int(sentence_eval.size(0)) size_sum_eval += size_eval sentence_eval = sentence_eval.to(device) unk_idx = val_to_idx["UNK"] mask_unk = y_eval[:, 1] != unk_idx #tag sentence_tag = sentence_eval.to(device) y_pred_tag = model_tag(sentence_tag) y_eval = y_eval.to(device) correct_tag = (y_pred_tag.argmax( dim=1) == y_eval[:, 0]).sum().item() loss_tag = loss_function(y_pred_tag, y_eval[:, 0].long()) correct_sum_tag += correct_tag loss_sum_tag += loss_tag summary_writer.add_scalar("model_tag: evaluation loss", loss_tag, global_step_eval) summary_writer.add_scalar( "model_tag: evaluation accuracy", 100 * (correct_tag / size_eval), global_step_eval) if mask_unk.sum() > 0: sentence_eval = sentence_eval[mask_unk].to(device) y_pred_val = model_val(sentence_eval) y_eval = y_eval.to(device) correct_val = (y_pred_val.argmax( dim=1) == y_eval[mask_unk, 1]).sum().item() loss_val = loss_function( y_pred_val, y_eval[mask_unk, 1].long()) correct_sum_val += correct_val loss_sum_val += loss_val summary_writer.add_scalar( "model_value: evaluation loss", loss_val, global_step_eval) summary_writer.add_scalar( "model_value: evaluation accuracy", 100 * (correct_val / size_eval), global_step_eval) summary_writer.add_scalar( "model_tag: average evaluation loss", loss_sum_tag / len(eval_data_loader), global_step // TIME_FOR_EVAL) summary_writer.add_scalar( "model_tag: average evaluation accuracy", 100 * (correct_sum_tag / size_sum_eval), global_step // TIME_FOR_EVAL) summary_writer.add_scalar( "model_value: average evaluation loss", loss_sum_val / len(eval_data_loader), global_step // TIME_FOR_EVAL) summary_writer.add_scalar( "model_value: average evaluation accuracy", 100 * (correct_sum_val / size_sum_eval), global_step // TIME_FOR_EVAL) tag = f"EVAL: tag accuracy: {100 * (correct_sum_tag / size_sum_eval)}, tag loss: {loss_sum_tag/len(eval_data_loader)}, " val = f"val accuracy: {100 * (correct_sum_val / size_sum_eval)}, val loss: {loss_sum_val/len(eval_data_loader)}\n" with open(f'{DATA_ROOT}log.txt', 'a') as log: log.write(tag) log.write(val)
from PIL import Image #torch import torch import torch.nn.functional as F import torchvision from torchvision import datasets, transforms, models from torch import nn #parameters Loading from AppParametersLoader import AppParametersLoader parameters = AppParametersLoader() parameters.print_all() #Data Loading from DataManager import DataManager data_manager = DataManager() data_manager.load_TrainTestValid(parameters.data_dir()) #model definition from ModelManager import ModelManager if parameters.arch() == 'vgg16': model = models.vgg16(pretrained=True) input_nodes = 25088 elif parameters.arch() == 'densenet121': model = models.densenet121(pretrained=True) input_nodes = 1024 classifier = nn.Sequential( nn.Linear(input_nodes, parameters.hidden_units()), nn.ReLU(), nn.Dropout(0.2), nn.Linear(parameters.hidden_units(),
parser.add_argument('--fast', type=int, choices=[0, 1], default=0) parser.add_argument('--screen', type=int, choices=[0, 1], default=0) parser.add_argument('--optimizer', type=str, default='ADAGRAD') parser.add_argument('--grained', type=int, default=2) parser.add_argument('--lr', type=float, default=0.0001) parser.add_argument('--lr_word_vector', type=float, default=0.000007) parser.add_argument('--epoch', type=int, default=25) parser.add_argument('--batch', type=int, default=10) parser.add_argument('--doc_num', type=int, default=50000) #parser.add_argument('--reload', type=str, default=True) parser.add_argument('--saveto', type=str, default='best_model17.pkl') parser.add_argument('--reload_dic', type=str, default=False) #parser.add_argument('--reload_dic', type=str, default='dic.pkl') args, _ = parser.parse_known_args(argv) random.seed(args.seed) data = DataManager(args.dataset) if args.reload_dic: print('reloading dictionary...') wordlist = data.load_word(args.reload_dic) else: print('building dictionary...') wordlist = data.gen_word() print('saving dictionary...') pkl.dump(wordlist, open('dic.pkl', 'wb'), -1) print('%d unique words in total' % len(wordlist)) train_data, test_data = data.gen_data(args.grained) random.shuffle(train_data) num = int(len(train_data) * 0.11) dev_data = train_data[:num] train_data_new = train_data[num:]
import numpy as np from keras.models import Sequential from keras.layers import LSTM, Dense, BatchNormalization, Flatten, Reshape from keras import regularizers from keras.preprocessing.image import ImageDataGenerator from keras import utils from DataManager import DataManager print("Loading training data...") dm = DataManager(random_state=0) training_data, training_labels = dm.loadTrainingData() testing_data, testing_labels = dm.loadTestingData() validation_data, validation_labels = dm.loadValidationData() print('Loaded shapes') for i in training_data, training_labels, testing_data, testing_labels, validation_data, validation_labels: print(i.shape) input_shape = tuple(training_data.shape[1:]) num_classes = len(np.unique(training_labels)) print("input_shape: {}".format(input_shape)) print("num_classes: {}".format(num_classes)) # Convert to categorical classes training_labels = utils.to_categorical(training_labels, num_classes) testing_labels = utils.to_categorical(testing_labels, num_classes) validation_labels = utils.to_categorical(validation_labels, num_classes) data_generator = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True, rotation_range=20,
skip_first = 0 for pair, df in self.returns_dict.items(): if skip_first == 0: skip_first = 1 continue combined_returns = pd.concat([combined_returns, df], ignore_index=True, axis=0) self.combined_returns = combined_returns self.total_returns = 1 for returns in combined_returns['returns'].values: self.total_returns = self.total_returns * (1 + returns) if __name__ == "__main__": dm = DataManager() # This code will just do it for one sector # x.data = x.getOneSector(sector="Energy", fromDate="2015-01-01", toDate="2016-09-21") dm.getOneSector(sector="Energy", fromDate="2013-01-01", toDate="2015-01-01") # x.calcReturns() strat = CointStrategyStopLoss bt = Backtester(strat, dm.data) bt.backtest() bt.plot_stuff() # bt.strat.CA.plot_pair(['MA','V'], fromDate="2014-01-01", toDate="2018-01-01") print(bt.total_returns) plt.show()
def destination_value(self) -> int: if self.__destination_value == -1 and self.__destination is not None: self.__destination_value = self.__rate_dest[ DataManager.transfer_airport_cod_names_to_all( self.__destination.code)] return self.__destination_value
print("epoch ", e, ": dev F1: ", devF1, ", test F1: ", testF1) f.write("epoch "+ str(e)+ ": dev F1: "+ str(devF1)+ ", test F1: "+ str(testF1)+ "\n") f.close() torch.save(model, "checkpoints/model_"+args.logfile+"_"+str(e)) if __name__ == "__main__": torch.manual_seed(1) if not os.path.exists('checkpoints'): os.mkdir('checkpoints') argv = sys.argv[1:] parser = Parser().getParser() args, _ = parser.parse_known_args(argv) print("Load data start...") dm = DataManager(args.datapath, args.testfile) wv = dm.vector train_data, test_data, dev_data = dm.data['train'], dm.data['test'], dm.data['dev'] print("train_data count: ", len(train_data)) print("test_data count: ", len(test_data)) print("dev_data count: ", len(dev_data)) model = Model(args.lr, args.dim, args.statedim, wv, dm.relation_count) model.cuda() if args.start != '': pretrain_model = torch.load(args.start) model_dict = model.state_dict() pretrained_dict = pretrain_model.state_dict() pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} model_dict.update(pretrained_dict)
def get_names(): image_list = 'images.txt' return DataManager.get('images/{}'.format(image_list))
def update(node,row_id): if not request.json: abort(400) return jsonify(DataManager().edit_row(node,row_id,request.json))
import gensim import numpy as np from sklearn.ensemble import RandomForestClassifier from concurrent.futures import ThreadPoolExecutor from DataManager import DataManager datamanager = DataManager() sentences = datamanager.sentences POSes = datamanager.parses POS_id = datamanager.POS_id entitypairs = datamanager.training_entitypairs testing_entitypairs = datamanager.testing_entitypairs relations = datamanager.relations document = [] def check_entity_in_words(entity, words): if entity in words: return True elif len(entity) == 3 and entity[1:] in words: return True else: return False def search_relation_sentence(entitypair): context = [] context_pos = [] e1_first_sentence = [] e1_first_pos = []
def mainTF(options): import tensorflow as tf from CreateModel import CreateModel from DataManager import DataManager from DataSet import DataSet print "PROCESSING VALIDATION DATA" dgSig = DataGetter.DefinedVariables(options.netOp.vNames, signal=True) dgBg = DataGetter.DefinedVariables(options.netOp.vNames, background=True) validDataSig = [ (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6p1/trainingTuple_0_division_1_TTbarSingleLepT_validation_0.h5", ), 1), (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6p1/trainingTuple_0_division_1_TTbarSingleLepTbar_validation_0.h5", ), 1) ] validDataBgTTbar = [ (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_TTbarSingleLepT_validation_0.h5", ), 1), (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_TTbarSingleLepTbar_validation_0.h5", ), 1), ] validDataBgQCDMC = [ (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_QCD_HT100to200_validation_0.h5", ), 1), (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_QCD_HT200to300_validation_0.h5", ), 1), (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_QCD_HT300to500_validation_0.h5", ), 1), (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_QCD_HT500to700_validation_0.h5", ), 1), (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_QCD_HT700to1000_validation_0.h5", ), 1), (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_QCD_HT1000to1500_validation_0.h5", ), 1), (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_QCD_HT1500to2000_validation_0.h5", ), 1), (("/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_QCD_HT2000toInf_validation_0.h5", ), 1) ] validDataBgQCDData = [(( "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_0_division_1_Data_JetHT_2016_validation_0.h5", ), 1)] print "Input Variables: ", len(dgSig.getList()) # Import data #print options.runOp.validationSamples validDataSig = getValidData(dgSig, validDataSig, options) validDataBgTTbar = getValidData(dgBg, validDataBgTTbar, options) validDataBgQCDMC = getValidData(dgBg, validDataBgQCDMC, options) validDataBgQCDData = getValidData(dgBg, validDataBgQCDData, options) validDataTTbar = combineValidationData(validDataSig, validDataBgTTbar) validDataQCDMC = combineValidationData(validDataSig, validDataBgQCDMC) validDataQCDData = combineValidationData(validDataSig, validDataBgQCDData) #get input/output sizes #print validData["data"].shape nFeatures = validDataTTbar["data"].shape[1] nLabels = validDataTTbar["labels"].shape[1] nWeights = validDataTTbar["weights"].shape[1] #Training parameters l2Reg = options.runOp.l2Reg MiniBatchSize = options.runOp.minibatchSize nEpoch = options.runOp.nepoch ReportInterval = options.runOp.reportInterval validationCount = min(options.runOp.nValidationEvents, validDataTTbar["data"].shape[0]) #scale data inputs to mean 0, stddev 1 categories = numpy.array(options.netOp.vCategories) mins = numpy.zeros(categories.shape, dtype=numpy.float32) ptps = numpy.zeros(categories.shape, dtype=numpy.float32) for i in xrange(categories.max()): selectedCategory = categories == i mins[selectedCategory] = validDataTTbar["data"][:, selectedCategory].mean( ) ptps[selectedCategory] = validDataTTbar["data"][:, selectedCategory].std( ) ptps[ptps < 1e-10] = 1.0 ##Create data manager, this class controls how data is fed to the network for training # DataSet(fileGlob, xsec, Nevts, kFactor, sig, prescale, rescale) signalDataSets = [ DataSet( "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6p1/trainingTuple_*_division_0_TTbarSingleLepT_training_*.h5", 365.4, 61878989, 1.0, True, 0, 1.0, 1.0, 8), DataSet( "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6p1/trainingTuple_*_division_0_TTbarSingleLepTbar_training_*.h5", 365.4, 61901450, 1.0, True, 0, 1.0, 1.0, 8), ] #pt reweighting histograms ttbarRatio = (numpy.array([ 0.7976347, 1.010679, 1.0329635, 1.0712056, 1.1147588, 1.0072196, 0.79854023, 0.7216115, 0.7717652, 0.851551, 0.8372917 ]), numpy.array([ 0., 50., 100., 150., 200., 250., 300., 350., 400., 450., 500., 1e10 ])) QCDDataRatio = (numpy.array([ 0.50125164, 0.70985824, 1.007087, 1.6701245, 2.5925348, 3.6850858, 4.924969, 6.2674766, 7.5736594, 8.406105, 7.7529635 ]), numpy.array([ 0., 50., 100., 150., 200., 250., 300., 350., 400., 450., 500., 1e10 ])) QCDMCRatio = (numpy.array([ 0.75231355, 1.0563549, 1.2571484, 1.3007764, 1.0678109, 0.83444154, 0.641499, 0.49130705, 0.36807108, 0.24333349, 0.06963781 ]), numpy.array([ 0., 50., 100., 150., 200., 250., 300., 350., 400., 450., 500., 1e10 ])) backgroundDataSets = [ DataSet( "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_TTbarSingleLepT_training_*.h5", 365.4, 61878989, 1.0, False, 0, 1.0, 1.0, 8, ttbarRatio), DataSet( "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_TTbarSingleLepTbar_training_*.h5", 365.4, 61901450, 1.0, False, 0, 1.0, 1.0, 8, ttbarRatio), DataSet( "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_Data_JetHT_2016_training_*.h5", 1.0, 1, 1.0, False, 1, 1.0, 1.0, 8, include=False), #QCDDataRatio), DataSet( "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_QCD_HT100to200_training_*.h5", 27990000, 80684349, 0.0, False, 2, 1.0, 1.0, 1, include=False), #QCDMCRatio), DataSet( "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_QCD_HT200to300_training_*.h5", 1712000, 57580393, 0.0, False, 2, 1.0, 1.0, 1, include=False), #QCDMCRatio), DataSet( "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_QCD_HT300to500_training_*.h5", 347700, 54537903, 0.0, False, 2, 1.0, 1.0, 1, include=False), #QCDMCRatio), DataSet( "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_QCD_HT500to700_training_*.h5", 32100, 62271343, 0.0, False, 2, 1.0, 1.0, 1, include=False), #QCDMCRatio), DataSet( "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_QCD_HT700to1000_training_*.h5", 6831, 45232316, 0.0, False, 2, 1.0, 1.0, 1, include=False), #QCDMCRatio), DataSet( "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_QCD_HT1000to1500_training_*.h5", 1207, 15127293, 0.0, False, 2, 1.0, 1.0, 1, include=False), #QCDMCRatio), DataSet( "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_QCD_HT1500to2000_training_*.h5", 119.9, 11826702, 0.0, False, 2, 1.0, 1.0, 1, include=False), #QCDMCRatio), DataSet( "/cms/data/pastika/trainData_pt20_30_40_dRPi_tightMass_deepFlavor_v6/trainingTuple_*_division_0_QCD_HT2000toInf_training_*.h5", 25.24, 6039005, 0.0, False, 2, 1.0, 1.0, 1, include=False), #QCDMCRatio), ] dm = DataManager(options.netOp.vNames, nEpoch, nFeatures, nLabels, 2, nWeights, options.runOp.ptReweight, signalDataSets, backgroundDataSets) # Build the graph denseNetwork = [nFeatures] + options.netOp.denseLayers + [nLabels] convLayers = options.netOp.convLayers rnnNodes = options.netOp.rnnNodes rnnLayers = options.netOp.rnnLayers mlp = CreateModel(options, denseNetwork, convLayers, rnnNodes, rnnLayers, dm.inputDataQueue, MiniBatchSize, mins, 1.0 / ptps) #summary writer summary_writer = tf.summary.FileWriter(options.runOp.directory + "log_graph", graph=tf.get_default_graph()) print "TRAINING NETWORK" with tf.Session(config=tf.ConfigProto( intra_op_parallelism_threads=8)) as sess: sess.run(tf.global_variables_initializer()) #start queue runners dm.launchQueueThreads(sess) print "Reporting validation loss every %i batches with %i events per batch for %i epochs" % ( ReportInterval, MiniBatchSize, nEpoch) #preload the first data into staging area sess.run([mlp.stagingOp], feed_dict={ mlp.reg: l2Reg, mlp.keep_prob: options.runOp.keepProb }) i = 0 N_TRAIN_SUMMARY = 10 #flush queue until the sample fraction is approximately equal flushctr = 200 while dm.continueTrainingLoop(): result = sess.run(dm.inputDataQueue.dequeue_many(MiniBatchSize)) signalCount = result[1][:, 0].sum() bgCount = result[1][:, 1].sum() signalFraction = signalCount / (signalCount + bgCount) #the first this fraction drops below 0.5 means we are close enough to equal signal/bg fraction if signalFraction < 0.5: flushctr -= 1 if flushctr <= 0: break try: while dm.continueTrainingLoop(): grw = 2 / (1 + exp(-i / 10000.0)) - 1 #run validation operations if i == 0 or not i % ReportInterval: #run validation operations validation_loss, accuracy, summary_vl = sess.run( [ mlp.loss_ph, mlp.accuracy, mlp.merged_valid_summary_op ], feed_dict={ mlp.x_ph: validDataTTbar["data"][:validationCount], mlp.y_ph_: validDataTTbar["labels"][:validationCount], mlp.p_ph_: validDataTTbar["domain"][:validationCount], mlp.reg: l2Reg, mlp.gradientReversalWeight: grw, mlp.wgt_ph: validDataTTbar["weights"][:validationCount] }) summary_writer.add_summary(summary_vl, i / N_TRAIN_SUMMARY) print( 'Interval %d, validation accuracy %0.6f, validation loss %0.6f' % (i / ReportInterval, accuracy, validation_loss)) validation_loss, accuracy, summary_vl_QCDMC = sess.run( [ mlp.loss_ph, mlp.accuracy, mlp.merged_valid_QCDMC_summary_op ], feed_dict={ mlp.x_ph: validDataQCDMC["data"][:validationCount], mlp.y_ph_: validDataQCDMC["labels"][:validationCount], mlp.p_ph_: validDataQCDMC["domain"][:validationCount], mlp.reg: l2Reg, mlp.gradientReversalWeight: grw, mlp.wgt_ph: validDataQCDMC["weights"][:validationCount] }) summary_writer.add_summary(summary_vl_QCDMC, i / N_TRAIN_SUMMARY) validation_loss, accuracy, summary_vl_QCDData = sess.run( [ mlp.loss_ph, mlp.accuracy, mlp.merged_valid_QCDData_summary_op ], feed_dict={ mlp.x_ph: validDataQCDData["data"][:validationCount], mlp.y_ph_: validDataQCDData["labels"][:validationCount], mlp.p_ph_: validDataQCDData["domain"][:validationCount], mlp.reg: l2Reg, mlp.gradientReversalWeight: grw, mlp.wgt_ph: validDataQCDData["weights"][:validationCount] }) summary_writer.add_summary(summary_vl_QCDData, i / N_TRAIN_SUMMARY) #run training operations if i % N_TRAIN_SUMMARY == 0: _, _, summary = sess.run( [ mlp.stagingOp, mlp.train_step, mlp.merged_train_summary_op ], feed_dict={ mlp.reg: l2Reg, mlp.keep_prob: options.runOp.keepProb, mlp.training: True, mlp.gradientReversalWeight: grw }) summary_writer.add_summary(summary, i / N_TRAIN_SUMMARY) else: sess.run( [mlp.stagingOp, mlp.train_step], feed_dict={ mlp.reg: l2Reg, mlp.keep_prob: options.runOp.keepProb, mlp.training: True }) i += 1 while dm.continueFlushingQueue(): sess.run(dm.inputDataQueue.dequeue_many(MiniBatchSize)) except Exception, e: # Report exceptions to the coordinator. dm.requestStop(e) finally:
class QuotesSpider(scrapy.Spider): name = "goldcar" def __init__(self, shop_name=None, *args, **kwargs): super(QuotesSpider, self).__init__(*args, **kwargs) self.database = DataManager(shop_name) self.shop_name = shop_name.lower() self.start_urls = self.database.getScrapyUrl() self.page_index = 1 # out = "抓取数据:" + self.start_urls[0] # print(out) def parse(self, response): for quote in response.xpath( ".//div[@class='column column-block block-grid-large single-item']" ): self.database.handlerStatus() time.sleep(random.randint(1, 3)) data_id = quote.xpath( ".//a[@class='img-link quickViewAction sPrimaryLink']/@data-id" ).extract()[0] + "/u/" data_img = str( quote.xpath( ".//a[@class='img-link quickViewAction sPrimaryLink']/@data-img" ).extract()[0]).split("item_L_")[-1].split( "_")[0] + "/i/?ctype=dsrch" uri = str( quote.xpath( ".//a[@class='img-link quickViewAction sPrimaryLink']/@href" ).extract()[0]).replace(data_id, data_img) if uri is not None: yield response.follow(uri, callback=self.parseHandler1) # 获取下一页的url, (DEL::如果没有就从头开始) next_page = response.xpath( ".//li[@class='pagination-next goToPage']/a/@href").extract() if next_page is not None and len(next_page) > 0: next_page = next_page[0].replace("page=", "section=2&page=") yield response.follow(next_page, callback=self.parse) def parseHandler1(self, response): if not response.text: print("parseHandler_b: empty response") return gold_shop = str( response.xpath(".//span[@class='unit-seller-link']//b//text()"). extract()[0]).lower() ean = str( response.xpath( ".//div[@id='productTrackingParams']/@data-ean").extract()[0]) url = response.xpath( ".//a[@class='show-for-medium bold-text']/@href").extract() if url is not None and len(url) > 0: yield response.follow(url[0], callback=self.parseHandler2, meta={ "ean": ean, "gold_shop": gold_shop }) def parseHandler2(self, response): infos = self.getAllPirce( response ) # 获取所有的价格并以此形式返回{shop_name:[price, rating, fullfilled], ...} self.solutionNoon(response.meta["ean"], infos, response.meta["gold_shop"]) def getAllPirce(self, response): infos = {} rows = response.xpath(".//div[@id='condition-all']/div[@class='row']") for row in rows: price = row.xpath( ".//div[@class='field price-field']//text()").extract()[0] price = round(float(price.strip().split('\n')[-1].split("SAR")[0]), 2) shop_name = row.xpath( ".//div[@class='field seller-name']//a//text()").extract( )[0].lower() ret = row.xpath( ".//div[@class='field clearfix labels']//div[@class='fullfilled']" ) fullfilled = False rating = 100 if ret: fullfilled = True else: rating = row.xpath( ".//div[@class='field seller-rating']//a//text()").extract( ) if rating: rating = round( float(rating[0].split('%')[0].split("(")[-1]), 2) else: rating = 0 # no rating yet infos[shop_name] = [price, rating, fullfilled] return infos def solutionNoon(self, ean, infos, gold_shop, variant_name=""): if not self.database.isInWhiteList(ean, variant_name): out = "前台:不在白名单 " + time.strftime("%Y-%m-%d %H:%M:%S") + " " + ean + "[" + variant_name + "]\t本店铺[" + str(infos[self.shop_name][0]) + "]\t" + \ "购物车[" + str(infos[gold_shop][0]) + "][" + gold_shop + "]" print(out) return attr = self.database.getAttr(ean) out = time.strftime("%Y-%m-%d %H:%M:%S") + " " + ean + "[" + variant_name + "]\t本店铺[" + str(infos[self.shop_name][0]) + "]\t" + \ "购物车[" + str(infos[gold_shop][0]) + "][" + gold_shop + "]" self.database.spiderRecord(ean, infos[gold_shop][0], gold_shop, variant_name) if gold_shop in attr["my_shop"]: # 黄金购物车是自家店铺 out = "情况A " + out + "\t不修改" else: if infos[self.shop_name][2]: # 是FBN产品 diff1 = abs(infos[gold_shop][0] - infos[self.shop_name][0] ) / infos[self.shop_name][0] if infos[gold_shop][2]: # 黄金购物车是FBN if diff1 > attr["percent"]: out = "情况B " + out + "\t不修改" else: price = round( min(infos[gold_shop][0], infos[self.shop_name][0]) - attr["lowwer"], 2) if price < attr["self_least_price"]: out = "情况C " + out + "\t不修改" else: self.database.needToChangePrice( ean, price, gold_shop, variant_name, 1) out = "情况C " + out + "\t差价比[" + str( round(diff1 * 100, 2)) + "%]\t改价为[" + str(price) + "]" else: price = round(infos[self.shop_name][0] - attr["lowwer"], 2) if price < max(infos[gold_shop][0], attr["self_least_price"]): out = "情况D " + out + "\t不修改" else: self.database.needToChangePrice( ean, price, gold_shop, variant_name, 1) out = "情况D " + out + "\t改价为[" + str(price) + "]" else: least_price = 999999 for info in infos.values(): if least_price > info[0]: least_price = info[0] diff2 = abs( min(infos[gold_shop][0], least_price) - infos[self.shop_name][0]) / infos[self.shop_name][0] if diff2 > attr["percent"]: out = "情况E " + out + "\t最低价[" + str(least_price) + "]\t" + "差价比[" + \ str(round(diff2 * 100, 2)) + "%]" + "不修改" else: price = round( min(infos[gold_shop][0], least_price) - attr["lowwer"], 2) if price < attr["self_least_price"]: out = "情况F " + out + "\t最低价[" + str( least_price) + "]\t" + "不修改" else: self.database.needToChangePrice( ean, price, gold_shop, variant_name, 0) out = "情况F " + out + "\t最低价[" + str(least_price) + "]\t" + "差价比[" + \ str(round(diff2 * 100, 2)) + "%]\t改价为[" + str(price) + "]" out = "前台:" + out print(out)
#! /usr/bin/env python import tensorflow as tf import numpy as np import os import time import datetime from DataManager import DataManager from RE_CNN import TextCNN from tensorflow.contrib import learn dataManager = DataManager() # Parameters # ================================================== # Eval Parameters tf.flags.DEFINE_integer("batch_size", 64, "Batch Size (default: 64)") tf.flags.DEFINE_string("checkpoint_dir", "./runs/1481741016/checkpoints", "Checkpoint directory from training run") tf.flags.DEFINE_boolean("eval_test", True, "Evaluate on all testing data") # Misc Parameters tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement") tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices") FLAGS = tf.flags.FLAGS FLAGS._parse_flags() print("\nParameters:") for attr, value in sorted(FLAGS.__flags.items()): print("{}={}".format(attr.upper(), value))
def process_frames(infos, threads=2): print("Processing with {0} thread(s)".format(threads)) with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor: for info in infos: executor.submit(process_frame, info) if __name__ == "__main__": # process command line arguments options = process_args() # load data data_manager = DataManager() data_manager.load("../db/g2x-1479064727.db") # make svg generator generator = SVGGenerator('./overlay.svg.mustache') # build list of frames of interest, and their associated metadata frames = filter( lambda f: f.in_range(options["start"], options["end"]), map( lambda f: Frame(options["input"], options["output"], f), os.listdir(options["input"]) ) ) # process all frames
# ========== NetManager呼び出し ========== net_cls = NetManager() # ========== PathManager呼び出し ========== path_cls = PathManager(tfrecord_folder=TFRECORD_FOLDER, output_rootfolder=OUT_ROOT_FOLDER, epoch_output_rootfolder=EPOCH_OUT_ROOT_FOLDER) path_cls.all_makedirs() # 結果保存フォルダ生成 # ========== DataSet呼び出し ========== # プロパティデータ読み込み df = pd.read_csv(path_cls.get_property_path()) shuf_train_ds_cls = DataManager( tfrecord_path=path_cls.get_train_ds_path(), img_root=IMAGE_ROOT_PATH, batch_size=SHUF_LEARN_BATCH_SIZE, net_cls=net_cls, data_n=df.at[0, 'total_learn_data'], suffle_buffer=SUFFLE_BUFFER_SIZE, ) train_ds_cls = DataManager( tfrecord_path=path_cls.get_train_ds_path(), img_root=IMAGE_ROOT_PATH, batch_size=LEARN_BATCH_SIZE, net_cls=net_cls, data_n=df.at[0, 'total_learn_data'], ) test_ds_cls = DataManager( tfrecord_path=path_cls.get_test_ds_path(), img_root=IMAGE_ROOT_PATH, batch_size=TEST_BATCH_SIZE, net_cls=net_cls,
def __init__(self, shop_name=None, *args, **kwargs): super(QuotesSpider, self).__init__(*args, **kwargs) self.database = DataManager(shop_name) self.shop_name = shop_name.lower() self.start_urls = self.database.getScrapyUrl() self.page_index = 1