def __init__(self): self.mongo = MongoDB() self.news_url_queue = news_url_Queue() # 存新闻url,用于多线程爬取 self.news_html_queue = news_url_Queue() # 存新闻html self.old_day_news_queue = news_url_Queue() # self.log = Logging('../helloword/static/sina').get_logging() self.log = Logging('../Sina/sina.txt').get_logging()
def main(): if len(sys.argv) < 2: print "error! Not enough arguments\nUsage: python playerData.py file.csv [info|debug]" return -1 pathOfFileToParse = sys.argv[1] global log if len(sys.argv) == 3 and sys.argv[2] != None: log = Logging(sys.argv[2]) log.logInfo('Executing: python %s %s %s' % (sys.argv[0], sys.argv[1], sys.argv[2])) else: log = Logging() log.logInfo('Executing: python %s %s' % (sys.argv[0], sys.argv[1])) log.logInfo('Execution starts') fileToParse = FileTreatment(pathOfFileToParse) log.logInfo('File to be parsed: %s' % (pathOfFileToParse)) pathOfDataFileGenerated = os.path.join(os.getcwd(), generateName('.sql')) pathOfPersonFileGenerated = os.path.join(os.getcwd(), generateName('.sql')) log.logInfo('Data file generated: %s' % (pathOfDataFileGenerated)) log.logInfo('Person file generated: %s' % (pathOfPersonFileGenerated)) dataFileGenerated = FileTreatment(pathOfDataFileGenerated) if parsingFile(fileToParse, dataFileGenerated, 'data') != -1: if dataFileGenerated.readFile() == -1: log.logInfo('File %s wrongly generated' % (dataFileGenerated)) return -1 populateDB(dataFileGenerated.file) personFileGenerated = FileTreatment(pathOfPersonFileGenerated) if parsingFile(fileToParse, personFileGenerated, 'person') != -1: if personFileGenerated.readFile() == -1: log.logInfo('File %s wrongly generated' % (personFileGenerated)) return -1 populateDB(personFileGenerated.file) fileToParse.deleteFile() dataFileGenerated.deleteFile() personFileGenerated.deleteFile() log.logInfo('Execution ends\n\n') return 1 else: print 'error! Wrong file' log.logInfo('Execution ends with failures\n\n') return -1
def wrapped(*args, **kwargs): # try: log.info logger = Logging(name = "Decorator") s_time=time.time() logger.info('[Enter method: %s]',func.__name__) try: return func(*args, **kwargs) except Exception, e: print 'Exception in %s : %s' % (func.__name__, e) raise Exception(e)
def initialize(self): self.logger = Logging() #Get list of songs from web if len(self.songs) <= 0: self.GetPlaylistFromWeb() if len(self.songs) > 0: print("Successfully retrieved playlist") else: print("Failed to get playlist from web!") return False #Get auth token if not self.token or self.sp is None: token = util.prompt_for_user_token( config.User_Name, scope, client_id=config.Client_ID, client_secret=config.Client_Secret, redirect_uri=redirect_uri) self.sp = spotipy.Spotify(auth=token) if not self.token or self.sp is None: print("Successfully got auth token") else: print("Failed to get auth token!") return False #Get playlist ID if not self.playlist_id: self.playlist_id = self.GetSpecificPlaylist() if self.playlist_id: print("Successfully got playlist ID") else: print("Failed to get playlist ID!") return False self.songs_already_in_playlist = self.GetPlaylistContents() if self.songs_already_in_playlist: print("Successfully got playlist from spotify") else: print("Failed to get playlist from spotify!") return False return True
def unifyParseResult(data_dict, bbd_type=None, **kwargs): if bbd_type is None: if data_dict.has_key("bbd_type"): bbd_type = data_dict["bbd_type"] else: bbd_type = "UniField" else: bbd_type = bbd_type logger = Logging(name=bbd_type) data_dict = copy.deepcopy(data_dict) basic_keys_list1 = [ "bbd_source", "bbd_table", "version", "bbd_html", "bbd_url", "bbd_params" ] UniField.addDefaultField(data_dict, basic_keys_list1) basic_keys_list2 = ["baxx", "bgxx", "gdxx", "fzjg", "xzcf"] UniField.addDefaultField(data_dict, basic_keys_list2, empty_value=[]) data_dict["version"] = 3 # 兼容 数据平台字段 data_dict.update(**kwargs) return data_dict
def setup_game(self): # this only happens once, at beginning # set up keys self.setup_keys() self.logging = Logging(self.config) self.sequences = CalSequences(self.config, self.base, self.logging, self.key_dict) if self.config.setdefault('PHOTO_PATH', False): self.photos = Photos(self.config, self.base, self.logging, self.deg_per_pixel) self.photos.load_all_photos() self.call_subroutine.append(self.photos) # print 'call_subroutine', self.call_subroutine # start generating/receiving data self.eye_data = EyeData(self.base, self.config['FAKE_DATA']) self.start_eye_data() # start reward capabilities, if using daq if self.use_daq_reward: # print 'setup reward' self.start_reward_task() if not self.testing: self.start_gig()
model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=conf.learning_rate, weight_decay=conf.weight_decay) ############################## PREPARE DATASET ############################## print('System start to load data...') t0 = time() train_data, val_data, test_data, user_seq_dict, item_seq_dict = data_utils.load_all( ) t1 = time() print('Data has been loaded successfully, cost:%.4fs' % (t1 - t0)) ########################### FIRST TRAINING ##################################### check_dir('%s/train_%s_abae_rs_id_x.py' % (conf.out_path, conf.data_name)) log = Logging('%s/train_%s_abae_rs_id_X.py' % (conf.out_path, conf.data_name)) train_model_path = '%s/train_%s_abae_rs_id_X.mod' % (conf.out_path, conf.data_name) # prepare data for the training stage train_dataset = data_utils.TrainData(train_data, user_seq_dict, item_seq_dict) val_dataset = data_utils.TrainData(val_data, user_seq_dict, item_seq_dict) test_dataset = data_utils.TrainData(test_data, user_seq_dict, item_seq_dict) train_batch_sampler = data.BatchSampler(data.RandomSampler(\ range(train_dataset.length)), batch_size=conf.batch_size, drop_last=False) val_batch_sampler = data.BatchSampler(data.RandomSampler(\ range(val_dataset.length)), batch_size=conf.batch_size, drop_last=False) test_batch_sampler = data.BatchSampler(data.RandomSampler(\
# -*- coding: utf-8 -*- apiVersion = '0.9' import gluon.contrib.simplejson as simplejson from Logging import Logging logger = Logging(db) from Events import Events eventDAL = Events(db) # The following line is required for .json output. # Note: There are security implications related to generic views. # See: # https://groups.google.com/forum/?fromgroups=#!topic/web2py/Jk-TIoQhRh4 # http://comments.gmane.org/gmane.comp.python.web2py/67902 response.generic_patterns = ['json', 'jsonp'] response.headers['Cache-Control'] = "max-age=0" @auth.requires_login() def remote_login(): user = False thisResponse = "Missing Form Field" jsonData = simplejson.loads(request.body.read()) if request.body else {} if jsonData: if jsonData.get("email") and jsonData.get("password"): password = jsonData.get("password").encode('ascii', 'replace') user = auth.login_bare(jsonData.get("email"), password) if user:
lr=conf.learning_rate, weight_decay=conf.weight_decay) review_optimizer = torch.optim.Adam(model.parameters(), lr=conf.learning_rate) ############################## PREPARE DATASET ############################## print('System start to load data...') t0 = time() train_data, val_data, test_data, user_doc_dict, item_doc_dict = data_utils.load_all( ) t1 = time() print('Data has been loaded successfully, cost:%.4fs' % (t1 - t0)) ########################### FIRST TRAINING ##################################### check_dir('%s/train_%s_cf_gcn_id_x.py' % (conf.out_path, conf.data_name)) log = Logging('%s/train_%s_cf_gcn_id_X.py' % (conf.out_path, conf.data_name)) train_model_path = '%s/train_%s_cf_gcn_id_X' % (conf.out_path, conf.data_name) # prepare data for the training stage train_dataset = data_utils.TrainData(train_data, user_doc_dict, item_doc_dict) train_batch_sampler = data.BatchSampler(data.RandomSampler(\ range(train_dataset.length)), batch_size=conf.batch_size, drop_last=False) val_dataset = data_utils.TrainData(val_data, user_doc_dict, item_doc_dict) val_batch_sampler = data.BatchSampler(data.SequentialSampler(\ range(val_dataset.length)), batch_size=conf.batch_size, drop_last=False) test_dataset = data_utils.TrainData(test_data, user_doc_dict, item_doc_dict)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.8) ############################## PREPARE DATASET ############################## print('System start to load data...') t0 = time() train_data, val_data, test_data, \ train_user_historical_review_dict, train_item_historical_review_dict = data_utils.load_all() t1 = time() print('Data has been loaded successfully, cost:%.4fs' % (t1 - t0)) ########################### FIRST TRAINING ##################################### check_dir('%s/train_%s_aspect_rating_1_id_x.log' % (conf.out_path, conf.data_name)) log = Logging('%s/train_%s_aspect_rating_1_id_02.py' % (conf.out_path, conf.data_name)) train_model_path = '%s/train_%s_aspect_rating_1_id_02.mod' % ( conf.out_path, conf.data_name) # prepare data for the training stage train_dataset = data_utils.TrainData(train_data, train_user_historical_review_dict, train_item_historical_review_dict, train_data) val_dataset = data_utils.TrainData(val_data, train_user_historical_review_dict, train_item_historical_review_dict, train_data) test_dataset = data_utils.TrainData(test_data, train_user_historical_review_dict, train_item_historical_review_dict,
model.load_state_dict( torch.load( '/content/drive/My Drive/task/aspect_based_rs/out/amazon_sports/train_amazon_sports_pmf_id_adam.mod' )) model.cuda() #optimizer = torch.optim.SGD(model.parameters(), lr=conf.learning_rate, weight_decay=conf.weight_decay) optimizer = torch.optim.Adam(model.parameters(), lr=conf.learning_rate, weight_decay=conf.weight_decay) #import adabound #optimizer = adabound.AdaBound(model.parameters(), lr=conf.learning_rate, final_lr=0.1, weight_decay=conf.weight_decay) ########################### FIRST TRAINING ##################################### check_dir('%s/train_%s_pmf_id_adam.log' % (conf.out_path, conf.data_name)) log = Logging('%s/train_%s_pmf_id_adam.log' % (conf.out_path, conf.data_name)) train_model_path = '%s/train_%s_pmf_id_adam.mod' % (conf.out_path, conf.data_name) # prepare data for the training stage train_dataset = data_utils.TrainData(train_data) val_dataset = data_utils.TrainData(val_data) test_dataset = data_utils.TrainData(test_data) train_batch_sampler = data.BatchSampler(data.RandomSampler( range(train_dataset.length)), batch_size=conf.batch_size, drop_last=False) val_batch_sampler = data.BatchSampler(data.RandomSampler( range(val_dataset.length)), batch_size=conf.batch_size,
def start(conf, data, model, evaluate, dir): log_dir = os.path.join(os.getcwd(), 'log') if not os.path.exists(log_dir): os.makedirs(log_dir) # define log name # log_path = os.path.join(os.getcwd(), 'log/%s_%s.log' % (conf.data_name, conf.model_name)) log_path = os.path.join(os.getcwd(), 'log/%s' % dir) # start to prepare data for training and evaluating data.initializeRankingHandle() d_train, d_val, d_test, d_test_eva = data.train, data.val, data.test, data.test_eva # test_eval和test是一样的数据 print('System start to load data...') t0 = time() d_train.initializeRankingTrain( ) # 生成hash_data, positive_data, negative_data, 下面同理 d_val.initializeRankingVT() d_test.initializeRankingVT() d_test_eva.initalizeRankingEva() t1 = time() print('Data has been loaded successfully, cost:%.4fs' % (t1 - t0)) # prepare model necessary data. data_dict = d_train.prepareModelSupplement( model) # 生成consumeItemList和socialNeighborList model.inputSupply(data_dict) # 生成consumeItemMatrix和socialNeighborMatrix model.startConstructGraph() # standard tensorflow running environment initialize tf_conf = tf.ConfigProto() os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' tf_conf.gpu_options.allow_growth = True sess = tf.Session(config=tf_conf) sess.run(model.init) conf.pretrain_flag = 0 if conf.pretrain_flag == 1: model.saver.restore(sess, conf.pre_model) # set debug_flag=0, doesn't print any results # log = Logging(log_path) log = Logging(log_path) print() log.record('Following will output the evaluation of the model:') # Start Training !!! for epoch in range(1, conf.epochs + 1): # optimize model with training data and compute train loss tmp_train_loss = [] t0 = time() #tmp_total_list = [] while d_train.terminal_flag: d_train.getTrainRankingBatch() # 一批一批的处理(512), 这里会修改while条件 d_train.linkedMap() train_feed_dict = {} for (key, value) in model.map_dict['train'].items(): train_feed_dict[key] = d_train.data_dict[value] [sub_train_loss, _] = sess.run(\ [model.map_dict['out']['train'], model.opt], feed_dict=train_feed_dict) tmp_train_loss.append(sub_train_loss) train_loss = np.mean(tmp_train_loss) t1 = time() # compute val loss and test loss d_val.getVTRankingOneBatch() d_val.linkedMap() val_feed_dict = {} for (key, value) in model.map_dict['val'].items(): val_feed_dict[key] = d_val.data_dict[value] val_loss = sess.run(model.map_dict['out']['val'], feed_dict=val_feed_dict) d_test.getVTRankingOneBatch() d_test.linkedMap() test_feed_dict = {} for (key, value) in model.map_dict['test'].items(): test_feed_dict[key] = d_test.data_dict[value] test_loss = sess.run(model.map_dict['out']['test'], feed_dict=test_feed_dict) t2 = time() # start evaluate model performance, hr and ndcg def getPositivePredictions(): d_test_eva.getEvaPositiveBatch() d_test_eva.linkedRankingEvaMap() eva_feed_dict = {} for (key, value) in model.map_dict['eva'].items(): eva_feed_dict[key] = d_test_eva.data_dict[value] positive_predictions = sess.run(model.map_dict['out']['eva'], feed_dict=eva_feed_dict) return positive_predictions def getNegativePredictions(): negative_predictions = {} terminal_flag = 1 while terminal_flag: batch_user_list, terminal_flag = d_test_eva.getEvaRankingBatch( ) d_test_eva.linkedRankingEvaMap() eva_feed_dict = {} for (key, value) in model.map_dict['eva'].items(): eva_feed_dict[key] = d_test_eva.data_dict[value] index = 0 tmp_negative_predictions = np.reshape( sess.run(model.map_dict['out']['eva'], feed_dict=eva_feed_dict), [-1, conf.num_evaluate]) for u in batch_user_list: negative_predictions[u] = tmp_negative_predictions[index] index = index + 1 return negative_predictions tt2 = time() index_dict = d_test_eva.eva_index_dict positive_predictions = getPositivePredictions() # 18579*1 ,这是没去重 negative_predictions = getNegativePredictions( ) # 10622 * 1000 这个去重了所以少了 d_test_eva.index = 0 # !!!important, prepare for new batch hr, ndcg = evaluate.evaluateRankingPerformance(\ index_dict, positive_predictions, negative_predictions, conf.topk, conf.num_procs) tt3 = time() # print log to console and log_file log.record('Epoch:%d, compute loss cost:%.4fs, train loss:%.4f, val loss:%.4f, test loss:%.4f' % \ (epoch, (t2-t0), train_loss, val_loss, test_loss)) log.record('Evaluate cost:%.4fs, hr:%.4f, ndcg:%.4f' % ((tt3 - tt2), hr, ndcg)) ## reset train data pointer, and generate new negative data d_train.generateTrainNegative()
self.client_address[0]) except ValueError: logging.append("ERROR! light " + data.rsplit('get light ')[1] + " doesn't exist!") self.request.send(b"false") else: self.request.send(b"false") if __name__ == '__main__': # First let's import some config files! configFile = configparser.ConfigParser() configFile.read('config.ini') logging = Logging(configFile['General']['LogFile']) address = (configFile['Server']['IP'], int(configFile['Server']['Port'])) server = socketserver.TCPServer(address, EchoRequestHandler) ip, port = server.server_address # find out what port we were given t = threading.Thread(target=server.serve_forever) t.setDaemon(True) # don't hang on exit t.start() # Start the webserver wserver = WebServer(configFile['Web']['IP'], configFile['Web']['Port']) wserver.start_server() try: server.serve_forever()
""" This is the Starting Point of Kijiji Web Scraping Script Environment Setup """ import sys from datetime import datetime from Logging import Logging from SetupEnvironment import SetupEnvironment """ Getting Basic Logging Options """ logger = Logging().get_logger("setup") if logger != None: # Checking Python Version python_major_version = sys.version_info.major if python_major_version < 3: logger.critical( "Setup Module : Version Issue : Please install Python Version 3 or greater to execute this script" ) else: logger.debug("Setup Module : Correct Python Version Found") logger.debug("Setup Module : Preparing Script Environment") # Staring Environment Setup setupEnvironment = SetupEnvironment(logger) setupEnvironment.installAndUnpgradeLibraries() logger.debug("Setup Module : Environment Setup Completed") else: print("Setup Module : Critical : Logging Setup Could Not Be Completed") print("Setup Module : Critical : Process will Exit") print("Setup Module : Critical : Contact Administrator For Resolution")
from HandleProperties import HandleProperties from datetime import datetime from datetime import date from datetime import timedelta from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait, Select from selenium.webdriver.support import expected_conditions as EC from Extract import Extract from selenium.webdriver.chrome.options import Options from oslo_concurrency import lockutils from selenium.common.exceptions import TimeoutException, WebDriverException from selenium.webdriver.common.keys import Keys # Getting Basic Logging Options logger = Logging().get_logger("scraping") # Fetching Configuration From Properties File scraping_script_path = pathlib.Path(__file__).parent.absolute() handleProperties = HandleProperties() configuration = handleProperties.read_properties(str(scraping_script_path) + "/Config/Scraping.properties") # Initializing Variables using Command Line Variables advertisment_links = set() print(sys.argv) total_command_line_arguments = len(sys.argv) logger.debug("Length of Arguments : " + str(total_command_line_arguments)) if total_command_line_arguments > 5 or total_command_line_arguments < 5: logger.error("Scraping Module : Incorrect No Of Arguments Passed") logger.error("Scraping Module : System exiting") sys.exit()
lr=conf.learning_rate, weight_decay=conf.weight_decay) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.8) ############################## PREPARE DATASET ############################## print('System start to load data...') t0 = time() train_data, val_data, test_data = data_utils.load_all() t1 = time() print('Data has been loaded successfully, cost:%.4fs' % (t1 - t0)) ########################### FIRST TRAINING ##################################### check_dir('%s/train_%s_mrg_id_x.log' % (conf.out_path, conf.data_name)) log = Logging('%s/train_%s_mrg_id_06.py' % (conf.out_path, conf.data_name)) train_model_path = '%s/train_%s_mrg_id_06.mod' % (conf.out_path, conf.data_name) # prepare data for the training stage train_dataset = data_utils.TrainData(train_data) val_dataset = data_utils.TrainData(val_data) test_dataset = data_utils.TrainData(test_data) train_batch_sampler = data.BatchSampler(data.RandomSampler( range(train_dataset.length)), batch_size=conf.batch_size, drop_last=False) val_batch_sampler = data.BatchSampler(data.RandomSampler( range(val_dataset.length)), batch_size=conf.batch_size,
k_means_weight = np.load( '/content/drive/My Drive/task/aspect_based_rs/data/amazon_electronics/electronics.k_means.npy' ) model_params['transform_T.weight'] = torch.FloatTensor( k_means_weight.transpose()) # (aspect_dimesion, word_dimension) model.load_state_dict(model_params) model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=conf.lr, weight_decay=conf.weight_decay) ########################### FIRST TRAINING ##################################### log = Logging( '/content/drive/My Drive/task/aspect_based_rs/out/amazon_electronics/train_amazon_electronics_aspect_rating_id_x.log' ) train_model_path = '/content/drive/My Drive/task/aspect_based_rs/out/amazon_electronics/train_amazon_electronics_aspect_rating_id_x.mod' # prepare data for the training stage train_dataset = data_utils.TrainData(train_data, train_review_embedding, train_user_historical_review_dict, train_item_historical_review_dict) val_dataset = data_utils.TrainData(val_data, val_review_embedding, val_user_historical_review_dict, val_item_historical_review_dict) test_dataset = data_utils.TrainData(test_data, test_review_embedding, test_user_historical_review_dict, test_item_historical_review_dict) train_batch_sampler = data.BatchSampler(data.RandomSampler(
def scraper(self, site, user, userloc, logpath, hrefCheck, entitie): """ This function scrapes the pages. :param site: Url from site :param user: User who uses the scraper :param userloc: Location of the current user :param logpath: Path of the log file. :param hrefCheck: Check if its the first time in the scraper. :param entitie: Check if the entities need to be extracted. :return: The filename and the SHA 256 value. """ text = "" href = [] what = str(site + " scrapen.") when = time.strftime("%d/%m/%Y" + " " + "%H:%M:%S") why = "Extract text from the site for research." result = str( site + " gescraped. .txt file has been made with the content of the original site." ) if str(site).endswith("\n"): site = site[:-1] try: print site scp = Scp() logging = Logging() HrefParser = hrefParser() if site.__contains__(".pdf"): # Download pdf and push it to the server if site.__contains__("www."): domain = site.split("www.") else: domain = site.split("://") tld = str(domain[1]) tld = tld.replace("/", "-") filename = "sites/" + tld # scraper = Scraper.scraper() self.download_file(site, filename) hex_dig = self.get_hashes(filename) scp.run(filename) # Write logging to .csv file. logging.log(user, userloc, when, what, why, result, hex_dig, logpath) return (filename, hex_dig) else: # Download the page page = requests.get(site) soup = BeautifulSoup(page.content, 'html.parser') # Extract all P tags for x, y in enumerate(soup.find_all('p')): text = text + soup.find_all('p')[x].get_text() # Extract all href's for a in soup.find_all('a', href=True): href.append(a['href']) # Parse text to unicode. unitext = unidecode(text) if site.__contains__("www."): domain = site.split("www.") else: domain = site.split("://") # Write text to .txt file filename = self.get_filname(domain, unitext) hex_dig = self.get_hashes(filename) if entitie: self.get_entities(filename, domain) scp.run(filename) logging.log(user, userloc, when, what, why, result, hex_dig, logpath) # Check if its the first scan. if hrefCheck == True and entitie == False: HrefParser.parser(href, str(domain[1])) print "SHA 256 : " + hex_dig + "\n" return except ExceptionHandling.WrongStatusCode as e: Logging.error_log("Menu", e.message) print "\033[93m" + e.message + "\033[0m" pass
def start(conf, data, model_name): if conf.data_name in ['beibei']: vis_port = 1496 elif conf.data_name in ['BeiBei2']: vis_port = 1469 elif conf.data_name in ['BeiBei']: vis_port = 9641 log_dir = os.path.join(os.getcwd(), 'log') if not os.path.exists(log_dir): os.makedirs(log_dir) for reg, lr in product(conf.reg, conf.learning_rate): print('reg: {}, lr: {}---------------------------'.format(reg, lr)) model = eval(model_name) model = model(conf, reg, lr) data.initializeRankingHandle() d_train, d_val, d_test, d_test_eva = data.train, data.val, data.test, data.test_eva print('System start to load data...') t0 = time() d_train.initializeRankingTrain() d_val.initializeRankingVT(d_train) d_test.initializeRankingVT(d_train) d_test_eva.initalizeRankingEva() t1 = time() print('Data has been loaded successfully, cost:%.4fs' % (t1 - t0)) # prepare model necessary data. data_dict = d_train.prepareModelSupplement() model.inputSupply(data_dict) model.startConstructGraph() # standard tensorflow running environment initialize tf_conf = tfv1.ConfigProto() os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' tf_conf.gpu_options.allow_growth = True # tf_conf.gpu_options.per_process_gpu_memory_fraction = 0.6 sess = tfv1.Session(config=tf_conf) sess.run(model.init) restore_epoch = 0 if conf.premodel_flag == 1: if conf.model_name == 'neumf' and (conf.test == 0): hhh = conf.pre_model.split('/') checkpoint_gmf = os.path.join('pretrain', conf.data_name, hhh[0]) model.saver_GMF.restore(sess, checkpoint_gmf) print('restore gmf done') checkpoint_mlp = os.path.join('pretrain', conf.data_name, hhh[1]) model.saver_mlp.restore(sess, checkpoint_mlp) print('restore mlp done') else: checkpoint = os.path.join('pretrain', conf.data_name, conf.pre_model) model.saver.restore(sess, checkpoint) print('restore model done') log_path = os.path.join( os.getcwd(), 'log/{}_{}_reg{}_lr{}_neg{}_epoch{}+{}_dim{}_{}.log'.format( conf.data_name, conf.model_name, reg, lr, conf.num_negatives, restore_epoch, conf.epochs, conf.dimension, conf.test_name)) # log_path = os.path.join(os.getcwd(), '{}_log/{}/dim{}_{}.log'.format(conf.data_name, conf.model_name, conf.dimension, conf.test_name)) log = Logging(log_path) log.record('Following will output the evaluation of the model:') ndcg_item = deque([0] * 30, 30) best_ndcg20 = 0.0 if conf.model_name in ['gcncsr']: name = conf.model_name + 'A' else: name = conf.model_name if not conf.test: vis = Visdom(port=vis_port, env='{}_reg{}lr{}-{}-dim{}'.format( name, reg, lr, conf.test_name, conf.dimension)) # Start Training !!! if conf.test == 2: After_Metric = {'Recall': [], 'NDCG': [], 'MRR': []} for epoch in range(1, conf.epochs + 1): if conf.test != 1: # optimize model with training data and compute train loss tmp_train_loss = [] tmp_train_social_loss = [] t0 = time() while d_train.terminal_flag: d_train.getTrainRankingBatch() d_train.linkedMap() train_feed_dict = {} for (key, value) in model.map_dict['train'].items(): train_feed_dict[key] = d_train.data_dict[value] if conf.model_name in ['ngcf']: train_feed_dict[model.training] = True if conf.model_name in ['gcncsr']: train_feed_dict[model.dp_ui] = conf.dp_ui if conf.model_name in ['lightgcn']: train_feed_dict[model.dp] = conf.dp if conf.model_name in ['sorec']: train_feed_dict[ model. user_social_loss_idx] = d_train.user_social_loss_idx [sub_train_loss, _] = sess.run(\ [model.map_dict['out']['train'], model.opt], feed_dict=train_feed_dict) tmp_train_loss.append(sub_train_loss) train_loss = np.mean(tmp_train_loss, 0) if conf.model_name in ['gcncsr'] and conf.social_loss: d_train.terminal_flag = 1 while d_train.terminal_flag: d_train.getTrainRankingBatch() d_train.linkedMap() [sub_train_social_loss, _] = sess.run(\ [model.social_loss, model.social_opt], feed_dict={model.user_social_loss_idx: d_train.user_social_loss_idx}) tmp_train_social_loss.append(sub_train_social_loss) social_loss = np.mean(tmp_train_social_loss) if conf.model_name in ['hgcn', 'gcncsr'] and conf.att: social_att = [] for i in range(len(d_train.split_idx) - 1): start, end = d_train.split_idx[ i], d_train.split_idx[i + 1] att = sess.run( model.social_att, feed_dict={ model.u0: d_train.social_edges_user0[start:end], model.u1: d_train.social_edges_user1[start:end] }) social_att.extend(list(att)) social_att = np.array(social_att) model.update_social_matrix(social_att) if conf.model_name in ['disgcn', 'disbpr' ] and conf.social_loss: d_train.terminal_flag = 1 while d_train.terminal_flag: d_train.getTrainRankingBatch() d_train.linkedMap() train_feed_dict = {} for (key, value) in model.map_dict['train_social'].items(): train_feed_dict[key] = d_train.data_dict[value] [sub_train_social_loss, _] = sess.run(\ [model.social_loss, model.social_opt], feed_dict=train_feed_dict) tmp_train_social_loss.append(sub_train_social_loss) social_loss = np.mean(tmp_train_social_loss) if conf.model_name in ['disgcn']: ufi_att_list = [ sess.run(model.ufi_att_list[k]) for k in range(conf.num_layers) ] model.update_ufi_att(ufi_att_list) if conf.att: int_att_list = [ sess.run(model.int_att_list[k]) for k in range(conf.num_layers) ] social_att_list = [ sess.run(model.social_att_list[k]) for k in range(conf.num_layers) ] model.update_att(int_att_list, social_att_list) d_train.generateTrainNegative() d_train.terminal_flag = 1 t2 = time() if conf.model_name in ['disbpr', 'disgcn', 'gcncsr' ] and conf.social_loss: log.record('Epoch:%d, compute loss cost:%.4fs, train loss:%.4f, social loss: %.4f' % \ (epoch, (t2-t0), train_loss, social_loss)) else: log.record('Epoch:%d, compute loss cost:%.4fs, train loss:%.4f' % \ (epoch, (t2-t0), train_loss)) if not conf.test: X = [epoch] if epoch == 1: if conf.model_name in ['disgcn', 'disbpr', 'gcncsr' ] and conf.social_loss: vis.line([train_loss], X, win='train loss', opts={'title': 'train loss'}) vis.line([social_loss], X, win='social loss', opts={'title': 'social loss'}) else: vis.line([train_loss], X, win='train loss', opts={'title': 'train loss'}) else: if conf.model_name in ['disgcn', 'disbpr', 'gcncsr' ] and conf.social_loss: vis.line([train_loss], X, win='train loss', update='append', opts={'title': 'train loss'}) vis.line([social_loss], X, win='social loss', update='append', opts={'title': 'social loss'}) else: vis.line([train_loss], X, win='train loss', update='append', opts={'title': 'train loss'}) if epoch % 5 == 0 or conf.test: metrics = test(model, conf, d_test_eva, sess, d_train, log) for i, k in enumerate(conf.topk): log.record('Recall@{}: {}, NDCG@{}: {}, MRR@{}: {}'.format( k, metrics['Recall'][i], k, metrics['NDCG'][i], k, metrics['MRR'][i])) if conf.test == 1: print('test done') exit() if conf.test == 2: for k in After_Metric.keys(): After_Metric[k].append(metrics[k]) if epoch == conf.epochs: for k in After_Metric.keys(): After_Metric[k] = np.mean(After_Metric[k], 0) for i, k in enumerate(conf.topk): log.record( 'Recall@{}: {}, NDCG@{}: {}, MRR@{}: {}'. format(k, After_Metric['Recall'][i], k, After_Metric['NDCG'][i], k, After_Metric['MRR'][i])) continue if epoch == 5: vis.line([metrics['NDCG'][1]], X, win='NDCG@20', opts={'title': 'NDCG@20'}) vis.line([metrics['Recall'][1]], X, win='Recall@20', opts={'title': 'Recall@20'}) vis.line([metrics['MRR'][1]], X, win='MRR@20', opts={'title': 'MRR@20'}) else: vis.line([metrics['NDCG'][1]], X, win='NDCG@20', update='append', opts={'title': 'NDCG@20'}) vis.line([metrics['Recall'][1]], X, win='Recall@20', update='append', opts={'title': 'Recall@20'}) vis.line([metrics['MRR'][1]], X, win='MRR@20', update='append', opts={'title': 'MRR@20'}) if metrics['NDCG'][1] > best_ndcg20: best_ndcg20 = metrics['NDCG'][1] save_path = './pretrain/{}/{}_{}_reg{}_lr{}_epoch{}+{}_dim{}_{}.ckpt'.format( conf.data_name, conf.data_name, conf.model_name, reg, lr, restore_epoch, conf.epochs, conf.dimension, conf.test_name) save_path = model.saver.save(sess, save_path, write_meta_graph=False) log.record('Model saved in ' + save_path) if conf.model_name in ['gcncsr']: save_path = './pretrain/{}/{}_{}_reg{}_lr{}_epoch{}+{}_dim{}_{}_att.npy'.format( conf.data_name, conf.data_name, conf.model_name, reg, lr, restore_epoch, conf.epochs, conf.dimension, conf.test_name) np.save( save_path, np.expand_dims( sess.run(model.social_neighbors_sparse_matrix. _values), 1)) log.record('save att values') log.record('test metric') metrics_test = test(model, conf, d_test, sess, d_train, log) for i, k in enumerate(conf.topk): log.record( 'Recall@{}: {}, NDCG@{}: {}, MRR@{}: {}'.format( k, metrics_test['Recall'][i], k, metrics_test['NDCG'][i], k, metrics_test['MRR'][i])) # if epoch%50 == 0: # save_path = './pretrain/{}/{}_{}_reg{}_lr{}_epoch{}+{}_dim{}_{}_{}.ckpt'.format(conf.data_name, conf.data_name, conf.model_name, reg, lr, restore_epoch, conf.epochs, conf.dimension, conf.test_name, epoch) # save_path = model.saver.save(sess, save_path, write_meta_graph=False) # log.record('Model saved in ' + save_path) ndcg_item.append(metrics['NDCG'][2]) if np.mean(ndcg_item ) > metrics['NDCG'][2] or epoch == conf.epochs: print('ndcg@20 dose not change, early stopping ...') break
from fm import fm model = fm() #model.load_state_dict(torch.load('/content/drive/My Drive/task/aspect_based_rs/out/model/train_amazon_clothing_fm_id_2.mod')) model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=conf.lr, weight_decay=conf.weight_decay) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.8) ########################### FIRST TRAINING ##################################### check_dir('%s/train_%s_fm_id_x.log' % (conf.out_path, conf.data_name)) log = Logging('%s/train_%s_fm_id_02.py' % (conf.out_path, conf.data_name)) train_model_path = '%s/train_%s_fm_id_02.mod' % (conf.out_path, conf.data_name) #import pdb; pdb.set_trace() # prepare data for the training stage train_dataset = data_utils.TrainData(train_data) val_dataset = data_utils.TrainData(val_data) test_dataset = data_utils.TrainData(test_data) train_batch_sampler = data.BatchSampler(data.RandomSampler( range(train_dataset.length)), batch_size=conf.batch_size, drop_last=False) val_batch_sampler = data.BatchSampler(data.RandomSampler(
optimizer_review = torch.optim.Adam(model.decoder.parameters(), lr=conf.learning_rate) optimizer_rating = torch.optim.Adam(model.encoder.parameters(), lr=conf.learning_rate, weight_decay=conf.weight_decay) ############################## PREPARE DATASET ############################## print('System start to load data...') t0 = time() train_data, val_data, test_data = data_utils.load_all() t1 = time() print('Data has been loaded successfully, cost:%.4fs' % (t1 - t0)) ########################### FIRST TRAINING ##################################### check_dir('%s/train_%s_lm_mf_id_x.log' % (conf.out_path, conf.data_name)) log = Logging('%s/train_%s_lm_mf_id_X.py' % (conf.out_path, conf.data_name)) train_model_path = '%s/train_%s_lm_mf_id_X.mod' % (conf.out_path, conf.data_name) # prepare data for the training stage train_dataset = data_utils.TrainData(train_data) val_dataset = data_utils.TrainData(val_data) test_dataset = data_utils.TrainData(test_data) train_batch_sampler = data.BatchSampler(data.RandomSampler( range(train_dataset.length)), batch_size=conf.batch_size, drop_last=False) val_batch_sampler = data.BatchSampler(data.RandomSampler( range(val_dataset.length)), batch_size=conf.batch_size,
# -*- coding: utf-8 -*- import sys reload(sys) sys.setdefaultencoding('utf-8') from Logging import Logging if __name__ == '__main__': log = Logging('BBDSpider').get_logging() log.debug('this is debug message') log.info('this is info message') log.error('this is error message')
t1 = time() print('Data has been loaded successfully, cost:%.4fs' % (t1 - t0)) ############################## CREATE MODEL ############################## from abae_rs import abae_rs model = abae_rs() #model.load_state_dict(torch.load('/content/drive/My Drive/task/aspect_based_rs/out/amazon_clothing/train_amazon_clothing_pmf_id_X1.mod')) model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=conf.learning_rate, weight_decay=conf.weight_decay) ########################### FIRST TRAINING ##################################### check_dir('%s/train_%s_ncf_id_x.log' % (conf.out_path, conf.data_name)) log = Logging('%s/train_%s_ncf_id_X1.log' % (conf.out_path, conf.data_name)) train_model_path = '%s/train_%s_ncf_id_X1.mod' % (conf.out_path, conf.data_name) # prepare data for the training stage train_dataset = data_utils.TrainData(train_data, user_seq_dict, item_seq_dict) val_dataset = data_utils.TrainData(val_data, user_seq_dict, item_seq_dict) test_dataset = data_utils.TrainData(test_data, user_seq_dict, item_seq_dict) train_batch_sampler = data.BatchSampler(data.RandomSampler(\ range(train_dataset.length)), batch_size=conf.batch_size, drop_last=False) val_batch_sampler = data.BatchSampler(data.RandomSampler(\ range(val_dataset.length)), batch_size=conf.batch_size, drop_last=False) test_batch_sampler = data.BatchSampler(data.RandomSampler(\
word_embedding.wv[word_embedding.wv.index2entity[idx - 3]]) k_means_weight = np.load( '/content/drive/My Drive/task/aspect_based_rs/data/amazon_movies_tv/amazon_movies_tv.k_means.npy' ) model_params['transform_T.weight'] = torch.FloatTensor( k_means_weight.transpose()) # (aspect_dimesion, word_dimension) model.load_state_dict(model_params) model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=conf.learning_rate) ########################### FIRST TRAINING ##################################### log = Logging( '/content/drive/My Drive/task/aspect_based_rs/out/amazon_movies_tv/train_amazon_movies_tv_abae_id_x.log' ) train_model_path = '/content/drive/My Drive/task/aspect_based_rs/out/amazon_movies_tv/train_amazon_movies_tv_abae_id_x.mod' # prepare data for the training stage train_dataset = data_utils.TrainData(train_data, train_review_embedding) val_dataset = data_utils.TrainData(val_data, val_review_embedding) test_dataset = data_utils.TrainData(test_data, test_review_embedding) train_batch_sampler = data.BatchSampler(data.RandomSampler( range(train_dataset.length)), batch_size=conf.batch_size, drop_last=False) val_batch_sampler = data.BatchSampler(data.RandomSampler( range(val_dataset.length)), batch_size=conf.batch_size,
model.load_state_dict(model_params) ''' model.load_state_dict( torch.load('%s/train_%s_abae_id_adabound.mod' % (conf.model_path, conf.data_name))) model.cuda() #optimizer = torch.optim.Adam(model.parameters(), lr=conf.learning_rate) import adabound optimizer = adabound.AdaBound(model.parameters(), lr=conf.learning_rate, final_lr=0.1) ########################### FIRST TRAINING ##################################### check_dir('%s/train_%s_abae_id_x.log' % (conf.out_path, conf.data_name)) log = Logging('%s/train_%s_abae_id_adabound.log' % (conf.out_path, conf.data_name)) train_model_path = '%s/train_%s_abae_id_adabound.mod' % (conf.out_path, conf.data_name) # prepare data for the training stage train_dataset = data_utils.TrainData(train_data, train_review_embedding) val_dataset = data_utils.TrainData(val_data, val_review_embedding) test_dataset = data_utils.TrainData(test_data, test_review_embedding) train_batch_sampler = data.BatchSampler(data.RandomSampler( range(train_dataset.length)), batch_size=conf.batch_size, drop_last=False) val_batch_sampler = data.BatchSampler(data.RandomSampler( range(val_dataset.length)), batch_size=conf.batch_size,
def unifyRequestResult(data_dict, bbd_type): """ 同理,整理抓取后的字段 :param company_name: (unicode) 公司名 :param data_dict: (dict) 公司信息 :return: (bool) 是否成功存储 -> true / false """ try: qyxx_dict = { "guangdong": u"广东", "hubei": u"湖北", "hunan": u"湖南", "henan": "河南", "heilongjiang": u"黑龙江", "hebei": u"湖北", "hainan": u"海南", "guizhou": u"贵州", "guangxi": u"广西", "fujian": u"福建", "chongqing": u"重庆", "beijing": u"北京", "anhui": u"安徽", "jiangsu": u"江苏", "gansu": u"甘肃", "xinjiang": u"新疆", "tianjin": u"天津", "sichuan": u"四川", "shanxixian": u"陕西", "shanxitaiyuan": u"山西", "shandong": u"山东", "shanghai": u"上海", "qinghai": u"青海", "ningxia": u"宁夏", "neimenggu": u"内蒙古", "liaoning": u"辽宁", "jilin": u"吉林", "jiangxi": u"江西", "xizang": u"西藏", "zhejiang": u"浙江", "yunnan": u"云南", "zongju": u"总局" } logger = Logging(name=bbd_type) if not data_dict.has_key("rowkey_dict"): raise Exception( "Company data dict don't has rowkey values, wrong data") else: data_dict = copy.deepcopy(data_dict) rowkey_dict = copy.deepcopy(data_dict["rowkey_dict"]) uptime = TimeUtil.timeStamp() dotime = TimeUtil.doTime() rowkey_dict["uptime"] = uptime rowkey_dict["dotime"] = dotime rowkey_dict["bbd_type"] = bbd_type data_dict.update(rowkey_dict) id_column_list = ["company_name", "company_zch", "dotime"] _id = UniField.getId(data_dict, id_column_list) # 添加rowkey 和 _id # rk_column_list=["company_name","bbd_type","dotime"] rowkey = UniField.getRowkey(bbd_type, _id) logger.info(u"统一字段(网页原文) 产生rowkey 为:[%s]", rowkey) data_dict["rowkey"] = rowkey data_dict["_id"] = _id if bbd_type in qyxx_dict.keys(): data_dict["type"] = qyxx_dict[bbd_type] if data_dict.has_key(None): del data_dict[None] return data_dict except Exception as e: logger.info(str(e))
#model.encoder.user_embedding.weight.requires_grad = False #model.encoder.item_embedding.weight.requires_grad = False optimizer = torch.optim.Adam(model.parameters(), lr=conf.learning_rate) ############################## PREPARE DATASET ############################## print('System start to load data...') t0 = time() train_data, val_data, test_data = data_utils.load_all() t1 = time() print('Data has been loaded successfully, cost:%.4fs' % (t1 - t0)) ########################### FIRST TRAINING ##################################### check_dir('%s/train_%s_lm_id_x.py' % (conf.out_path, conf.data_name)) log = Logging('%s/train_%s_att2seq_id_X.py' % (conf.out_path, conf.data_name)) train_model_path = '%s/train_%s_att2seq_id_X' % (conf.out_path, conf.data_name) # prepare data for the training stage train_dataset = data_utils.TrainData(train_data) train_batch_sampler = data.BatchSampler(data.RandomSampler(\ range(train_dataset.length)), batch_size=conf.batch_size, drop_last=False) review_val_dataset = data_utils.TestData(val_data) review_val_sampler = data.BatchSampler(data.RandomSampler(\ range(review_val_dataset.length)), batch_size=conf.batch_size, drop_last=False) review_test_dataset = data_utils.TestData(test_data) review_test_sampler = data.BatchSampler(data.RandomSampler(\ range(review_test_dataset.length)), batch_size=conf.batch_size, drop_last=False)
# load word embedding from pretrained word2vec model model_params = model.state_dict() word_embedding = Word2Vec.load('/content/drive/My Drive/task/aspect_based_rs/data/amazon_clothing/amazon_clothing.wv.model') for idx in range(3, conf.vocab_sz): model_params['word_embedding.weight'][idx] = torch.FloatTensor(word_embedding.wv[word_embedding.wv.index2entity[idx-3]]) model.load_state_dict(model_params) #model.load_state_dict(torch.load('/content/drive/My Drive/task/aspect_based_rs/out/amazon_clothing/train_amazon_clothing_language_model_id_0X.mod')) model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=conf.learning_rate) ########################### FIRST TRAINING ##################################### check_dir('%s/train_%s_language_model_id_x.log' % (conf.out_path, conf.data_name)) log = Logging('%s/train_%s_language_model_id_0X.py' % (conf.out_path, conf.data_name)) train_model_path = '%s/train_%s_language_model_id_0X.mod' % (conf.out_path, conf.data_name) # prepare data for the training stage train_dataset = data_utils.TrainData(train_data) val_dataset = data_utils.TrainData(val_data) test_dataset = data_utils.TrainData(test_data) train_batch_sampler = data.BatchSampler(data.RandomSampler(range(train_dataset.length)), batch_size=conf.batch_size, drop_last=False) val_batch_sampler = data.BatchSampler(data.RandomSampler(range(val_dataset.length)), batch_size=conf.batch_size, drop_last=False) test_batch_sampler = data.BatchSampler(data.RandomSampler(range(test_dataset.length)), batch_size=conf.batch_size, drop_last=False) # Start Training !!! min_loss = 0 for epoch in range(1, conf.train_epochs+1): t0 = time()
from nrms import nrms model = nrms() model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=conf.learning_rate) ############################## PREPARE DATASET ############################## print('System start to load data...') t0 = time() train_data, val_data = data_utils.load_all() t1 = time() print('Data has been loaded successfully, cost:%.4fs' % (t1 - t0)) ########################### TRAINING STAGE ################################## check_dir('%s/train_log' % conf.out_path) log = Logging('%s/train_%s_nrms.log' % (conf.out_path, conf.data_name)) train_model_path = '%s/train_%s_nrms.mod' % (conf.out_path, conf.data_name) # prepare data for the training stage train_dataset = data_utils.TrainData(train_data) val_dataset = data_utils.TestData(val_data) train_batch_sampler = data.BatchSampler(data.RandomSampler( range(train_dataset.length)), batch_size=conf.batch_size, drop_last=False) val_batch_sampler = data.BatchSampler(data.SequentialSampler( range(val_dataset.length)), batch_size=conf.batch_size, drop_last=True) # Start Training !!! max_auc = 0 for epoch in range(1, conf.train_epochs+1): t0 = time()
weight_decay=conf.weight_decay) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.8) ############################## PREPARE DATASET ############################## print('System start to load data...') t0 = time() train_data, val_data, test_data = data_utils.load_all() t1 = time() print('Data has been loaded successfully, cost:%.4fs' % (t1 - t0)) ########################### FIRST TRAINING ##################################### check_dir('%s/train_%s_expansion_net_id_x.log' % (conf.out_path, conf.data_name)) log = Logging('%s/train_%s_expansion_net_id_X.py' % (conf.out_path, conf.data_name)) train_model_path = '%s/train_%s_expansion_net_id_X.mod' % (conf.out_path, conf.data_name) # prepare data for the training stage train_dataset = data_utils.TrainData(train_data) val_dataset = data_utils.TrainData(val_data) test_dataset = data_utils.TrainData(test_data) train_batch_sampler = data.BatchSampler(data.RandomSampler( range(train_dataset.length)), batch_size=conf.batch_size, drop_last=False) val_batch_sampler = data.BatchSampler(data.RandomSampler( range(val_dataset.length)), batch_size=conf.batch_size,