Example #1
0
 def __init__(self):
     self.mongo = MongoDB()
     self.news_url_queue = news_url_Queue()  # 存新闻url,用于多线程爬取
     self.news_html_queue = news_url_Queue()  # 存新闻html
     self.old_day_news_queue = news_url_Queue()
     # self.log = Logging('../helloword/static/sina').get_logging()
     self.log = Logging('../Sina/sina.txt').get_logging()
Example #2
0
def main():
    if len(sys.argv) < 2:
        print "error! Not enough arguments\nUsage: python playerData.py file.csv [info|debug]"
        return -1
    pathOfFileToParse = sys.argv[1]
    global log
    if len(sys.argv) == 3 and sys.argv[2] != None:
        log = Logging(sys.argv[2])
        log.logInfo('Executing: python %s %s %s' %
                    (sys.argv[0], sys.argv[1], sys.argv[2]))
    else:
        log = Logging()
        log.logInfo('Executing: python %s %s' % (sys.argv[0], sys.argv[1]))
    log.logInfo('Execution starts')
    fileToParse = FileTreatment(pathOfFileToParse)
    log.logInfo('File to be parsed: %s' % (pathOfFileToParse))
    pathOfDataFileGenerated = os.path.join(os.getcwd(), generateName('.sql'))
    pathOfPersonFileGenerated = os.path.join(os.getcwd(), generateName('.sql'))
    log.logInfo('Data file generated: %s' % (pathOfDataFileGenerated))
    log.logInfo('Person file generated: %s' % (pathOfPersonFileGenerated))
    dataFileGenerated = FileTreatment(pathOfDataFileGenerated)
    if parsingFile(fileToParse, dataFileGenerated, 'data') != -1:
        if dataFileGenerated.readFile() == -1:
            log.logInfo('File %s wrongly generated' % (dataFileGenerated))
            return -1
        populateDB(dataFileGenerated.file)
        personFileGenerated = FileTreatment(pathOfPersonFileGenerated)
        if parsingFile(fileToParse, personFileGenerated, 'person') != -1:
            if personFileGenerated.readFile() == -1:
                log.logInfo('File %s wrongly generated' %
                            (personFileGenerated))
                return -1
            populateDB(personFileGenerated.file)
            fileToParse.deleteFile()
            dataFileGenerated.deleteFile()
            personFileGenerated.deleteFile()
        log.logInfo('Execution ends\n\n')
        return 1
    else:
        print 'error! Wrong file'
        log.logInfo('Execution ends with failures\n\n')
    return -1
Example #3
0
    def wrapped(*args, **kwargs):
        # try: log.info
        logger = Logging(name = "Decorator")
        s_time=time.time()
        logger.info('[Enter method: %s]',func.__name__)
        try:

            return func(*args, **kwargs)
        except Exception, e:
            print 'Exception in %s : %s' % (func.__name__, e)
            raise Exception(e)
Example #4
0
    def initialize(self):
        self.logger = Logging()

        #Get list of songs from web
        if len(self.songs) <= 0:
            self.GetPlaylistFromWeb()
            if len(self.songs) > 0:
                print("Successfully retrieved playlist")
            else:
                print("Failed to get playlist from web!")
                return False

        #Get auth token
        if not self.token or self.sp is None:
            token = util.prompt_for_user_token(
                config.User_Name,
                scope,
                client_id=config.Client_ID,
                client_secret=config.Client_Secret,
                redirect_uri=redirect_uri)
            self.sp = spotipy.Spotify(auth=token)

            if not self.token or self.sp is None:
                print("Successfully got auth token")
            else:
                print("Failed to get auth token!")
                return False

        #Get playlist ID
        if not self.playlist_id:
            self.playlist_id = self.GetSpecificPlaylist()
            if self.playlist_id:
                print("Successfully got playlist ID")
            else:
                print("Failed to get playlist ID!")
                return False

        self.songs_already_in_playlist = self.GetPlaylistContents()
        if self.songs_already_in_playlist:
            print("Successfully got playlist from spotify")
        else:
            print("Failed to get playlist from spotify!")
            return False

        return True
Example #5
0
    def unifyParseResult(data_dict, bbd_type=None, **kwargs):

        if bbd_type is None:
            if data_dict.has_key("bbd_type"):
                bbd_type = data_dict["bbd_type"]
            else:
                bbd_type = "UniField"
        else:
            bbd_type = bbd_type
        logger = Logging(name=bbd_type)
        data_dict = copy.deepcopy(data_dict)
        basic_keys_list1 = [
            "bbd_source", "bbd_table", "version", "bbd_html", "bbd_url",
            "bbd_params"
        ]
        UniField.addDefaultField(data_dict, basic_keys_list1)

        basic_keys_list2 = ["baxx", "bgxx", "gdxx", "fzjg", "xzcf"]
        UniField.addDefaultField(data_dict, basic_keys_list2, empty_value=[])
        data_dict["version"] = 3  # 兼容 数据平台字段
        data_dict.update(**kwargs)
        return data_dict
Example #6
0
 def setup_game(self):
     # this only happens once, at beginning
     # set up keys
     self.setup_keys()
     self.logging = Logging(self.config)
     self.sequences = CalSequences(self.config, self.base, self.logging,
                                   self.key_dict)
     if self.config.setdefault('PHOTO_PATH', False):
         self.photos = Photos(self.config, self.base, self.logging,
                              self.deg_per_pixel)
         self.photos.load_all_photos()
         self.call_subroutine.append(self.photos)
         # print 'call_subroutine', self.call_subroutine
     # start generating/receiving data
     self.eye_data = EyeData(self.base, self.config['FAKE_DATA'])
     self.start_eye_data()
     # start reward capabilities, if using daq
     if self.use_daq_reward:
         # print 'setup reward'
         self.start_reward_task()
     if not self.testing:
         self.start_gig()
Example #7
0
    model.cuda()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=conf.learning_rate,
                                 weight_decay=conf.weight_decay)

    ############################## PREPARE DATASET ##############################
    print('System start to load data...')
    t0 = time()
    train_data, val_data, test_data, user_seq_dict, item_seq_dict = data_utils.load_all(
    )
    t1 = time()
    print('Data has been loaded successfully, cost:%.4fs' % (t1 - t0))

    ########################### FIRST TRAINING #####################################
    check_dir('%s/train_%s_abae_rs_id_x.py' % (conf.out_path, conf.data_name))
    log = Logging('%s/train_%s_abae_rs_id_X.py' %
                  (conf.out_path, conf.data_name))
    train_model_path = '%s/train_%s_abae_rs_id_X.mod' % (conf.out_path,
                                                         conf.data_name)

    # prepare data for the training stage
    train_dataset = data_utils.TrainData(train_data, user_seq_dict,
                                         item_seq_dict)
    val_dataset = data_utils.TrainData(val_data, user_seq_dict, item_seq_dict)
    test_dataset = data_utils.TrainData(test_data, user_seq_dict,
                                        item_seq_dict)

    train_batch_sampler = data.BatchSampler(data.RandomSampler(\
        range(train_dataset.length)), batch_size=conf.batch_size, drop_last=False)
    val_batch_sampler = data.BatchSampler(data.RandomSampler(\
        range(val_dataset.length)), batch_size=conf.batch_size, drop_last=False)
    test_batch_sampler = data.BatchSampler(data.RandomSampler(\
Example #8
0
# -*- coding: utf-8 -*-

apiVersion = '0.9'

import gluon.contrib.simplejson as simplejson
from Logging import Logging
logger = Logging(db)

from Events import Events
eventDAL = Events(db)

# The following line is required for .json output.
# Note: There are security implications related to generic views.
# See:
#   https://groups.google.com/forum/?fromgroups=#!topic/web2py/Jk-TIoQhRh4
#   http://comments.gmane.org/gmane.comp.python.web2py/67902
response.generic_patterns = ['json', 'jsonp']

response.headers['Cache-Control'] = "max-age=0"


@auth.requires_login()
def remote_login():
    user = False
    thisResponse = "Missing Form Field"
    jsonData = simplejson.loads(request.body.read()) if request.body else {}
    if jsonData:
        if jsonData.get("email") and jsonData.get("password"):
            password = jsonData.get("password").encode('ascii', 'replace')
            user = auth.login_bare(jsonData.get("email"), password)
            if user:
Example #9
0
                                        lr=conf.learning_rate,
                                        weight_decay=conf.weight_decay)
    review_optimizer = torch.optim.Adam(model.parameters(),
                                        lr=conf.learning_rate)

    ############################## PREPARE DATASET ##############################
    print('System start to load data...')
    t0 = time()
    train_data, val_data, test_data, user_doc_dict, item_doc_dict = data_utils.load_all(
    )
    t1 = time()
    print('Data has been loaded successfully, cost:%.4fs' % (t1 - t0))

    ########################### FIRST TRAINING #####################################
    check_dir('%s/train_%s_cf_gcn_id_x.py' % (conf.out_path, conf.data_name))
    log = Logging('%s/train_%s_cf_gcn_id_X.py' %
                  (conf.out_path, conf.data_name))
    train_model_path = '%s/train_%s_cf_gcn_id_X' % (conf.out_path,
                                                    conf.data_name)

    # prepare data for the training stage
    train_dataset = data_utils.TrainData(train_data, user_doc_dict,
                                         item_doc_dict)
    train_batch_sampler = data.BatchSampler(data.RandomSampler(\
        range(train_dataset.length)), batch_size=conf.batch_size, drop_last=False)

    val_dataset = data_utils.TrainData(val_data, user_doc_dict, item_doc_dict)
    val_batch_sampler = data.BatchSampler(data.SequentialSampler(\
        range(val_dataset.length)), batch_size=conf.batch_size, drop_last=False)

    test_dataset = data_utils.TrainData(test_data, user_doc_dict,
                                        item_doc_dict)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=5,
                                                gamma=0.8)

    ############################## PREPARE DATASET ##############################
    print('System start to load data...')
    t0 = time()
    train_data, val_data, test_data, \
        train_user_historical_review_dict, train_item_historical_review_dict = data_utils.load_all()
    t1 = time()
    print('Data has been loaded successfully, cost:%.4fs' % (t1 - t0))

    ########################### FIRST TRAINING #####################################
    check_dir('%s/train_%s_aspect_rating_1_id_x.log' %
              (conf.out_path, conf.data_name))
    log = Logging('%s/train_%s_aspect_rating_1_id_02.py' %
                  (conf.out_path, conf.data_name))
    train_model_path = '%s/train_%s_aspect_rating_1_id_02.mod' % (
        conf.out_path, conf.data_name)

    # prepare data for the training stage
    train_dataset = data_utils.TrainData(train_data,
                                         train_user_historical_review_dict,
                                         train_item_historical_review_dict,
                                         train_data)
    val_dataset = data_utils.TrainData(val_data,
                                       train_user_historical_review_dict,
                                       train_item_historical_review_dict,
                                       train_data)
    test_dataset = data_utils.TrainData(test_data,
                                        train_user_historical_review_dict,
                                        train_item_historical_review_dict,
Example #11
0
    model.load_state_dict(
        torch.load(
            '/content/drive/My Drive/task/aspect_based_rs/out/amazon_sports/train_amazon_sports_pmf_id_adam.mod'
        ))
    model.cuda()
    #optimizer = torch.optim.SGD(model.parameters(), lr=conf.learning_rate, weight_decay=conf.weight_decay)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=conf.learning_rate,
                                 weight_decay=conf.weight_decay)

    #import adabound
    #optimizer = adabound.AdaBound(model.parameters(), lr=conf.learning_rate, final_lr=0.1, weight_decay=conf.weight_decay)

    ########################### FIRST TRAINING #####################################
    check_dir('%s/train_%s_pmf_id_adam.log' % (conf.out_path, conf.data_name))
    log = Logging('%s/train_%s_pmf_id_adam.log' %
                  (conf.out_path, conf.data_name))
    train_model_path = '%s/train_%s_pmf_id_adam.mod' % (conf.out_path,
                                                        conf.data_name)

    # prepare data for the training stage
    train_dataset = data_utils.TrainData(train_data)
    val_dataset = data_utils.TrainData(val_data)
    test_dataset = data_utils.TrainData(test_data)

    train_batch_sampler = data.BatchSampler(data.RandomSampler(
        range(train_dataset.length)),
                                            batch_size=conf.batch_size,
                                            drop_last=False)
    val_batch_sampler = data.BatchSampler(data.RandomSampler(
        range(val_dataset.length)),
                                          batch_size=conf.batch_size,
Example #12
0
def start(conf, data, model, evaluate, dir):
    log_dir = os.path.join(os.getcwd(), 'log')
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    # define log name
    # log_path = os.path.join(os.getcwd(), 'log/%s_%s.log' % (conf.data_name, conf.model_name))
    log_path = os.path.join(os.getcwd(), 'log/%s' % dir)
    # start to prepare data for training and evaluating
    data.initializeRankingHandle()

    d_train, d_val, d_test, d_test_eva = data.train, data.val, data.test, data.test_eva  # test_eval和test是一样的数据

    print('System start to load data...')
    t0 = time()
    d_train.initializeRankingTrain(
    )  # 生成hash_data, positive_data, negative_data, 下面同理
    d_val.initializeRankingVT()
    d_test.initializeRankingVT()
    d_test_eva.initalizeRankingEva()
    t1 = time()
    print('Data has been loaded successfully, cost:%.4fs' % (t1 - t0))

    # prepare model necessary data.
    data_dict = d_train.prepareModelSupplement(
        model)  # 生成consumeItemList和socialNeighborList
    model.inputSupply(data_dict)  # 生成consumeItemMatrix和socialNeighborMatrix
    model.startConstructGraph()

    # standard tensorflow running environment initialize
    tf_conf = tf.ConfigProto()
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    tf_conf.gpu_options.allow_growth = True
    sess = tf.Session(config=tf_conf)

    sess.run(model.init)

    conf.pretrain_flag = 0
    if conf.pretrain_flag == 1:
        model.saver.restore(sess, conf.pre_model)

    # set debug_flag=0, doesn't print any results
    # log = Logging(log_path)
    log = Logging(log_path)
    print()
    log.record('Following will output the evaluation of the model:')

    # Start Training !!!
    for epoch in range(1, conf.epochs + 1):
        # optimize model with training data and compute train loss
        tmp_train_loss = []
        t0 = time()

        #tmp_total_list = []
        while d_train.terminal_flag:
            d_train.getTrainRankingBatch()  # 一批一批的处理(512), 这里会修改while条件
            d_train.linkedMap()

            train_feed_dict = {}
            for (key, value) in model.map_dict['train'].items():
                train_feed_dict[key] = d_train.data_dict[value]

            [sub_train_loss, _] = sess.run(\
                [model.map_dict['out']['train'], model.opt], feed_dict=train_feed_dict)
            tmp_train_loss.append(sub_train_loss)
        train_loss = np.mean(tmp_train_loss)
        t1 = time()

        # compute val loss and test loss
        d_val.getVTRankingOneBatch()
        d_val.linkedMap()
        val_feed_dict = {}
        for (key, value) in model.map_dict['val'].items():
            val_feed_dict[key] = d_val.data_dict[value]
        val_loss = sess.run(model.map_dict['out']['val'],
                            feed_dict=val_feed_dict)

        d_test.getVTRankingOneBatch()
        d_test.linkedMap()
        test_feed_dict = {}
        for (key, value) in model.map_dict['test'].items():
            test_feed_dict[key] = d_test.data_dict[value]
        test_loss = sess.run(model.map_dict['out']['test'],
                             feed_dict=test_feed_dict)
        t2 = time()

        # start evaluate model performance, hr and ndcg
        def getPositivePredictions():
            d_test_eva.getEvaPositiveBatch()
            d_test_eva.linkedRankingEvaMap()
            eva_feed_dict = {}
            for (key, value) in model.map_dict['eva'].items():
                eva_feed_dict[key] = d_test_eva.data_dict[value]
            positive_predictions = sess.run(model.map_dict['out']['eva'],
                                            feed_dict=eva_feed_dict)
            return positive_predictions

        def getNegativePredictions():
            negative_predictions = {}
            terminal_flag = 1
            while terminal_flag:
                batch_user_list, terminal_flag = d_test_eva.getEvaRankingBatch(
                )
                d_test_eva.linkedRankingEvaMap()
                eva_feed_dict = {}
                for (key, value) in model.map_dict['eva'].items():
                    eva_feed_dict[key] = d_test_eva.data_dict[value]
                index = 0
                tmp_negative_predictions = np.reshape(
                    sess.run(model.map_dict['out']['eva'],
                             feed_dict=eva_feed_dict), [-1, conf.num_evaluate])
                for u in batch_user_list:
                    negative_predictions[u] = tmp_negative_predictions[index]
                    index = index + 1
            return negative_predictions

        tt2 = time()

        index_dict = d_test_eva.eva_index_dict
        positive_predictions = getPositivePredictions()  # 18579*1 ,这是没去重
        negative_predictions = getNegativePredictions(
        )  # 10622 * 1000   这个去重了所以少了

        d_test_eva.index = 0  # !!!important, prepare for new batch
        hr, ndcg = evaluate.evaluateRankingPerformance(\
            index_dict, positive_predictions, negative_predictions, conf.topk, conf.num_procs)
        tt3 = time()

        # print log to console and log_file
        log.record('Epoch:%d, compute loss cost:%.4fs, train loss:%.4f, val loss:%.4f, test loss:%.4f' % \
            (epoch, (t2-t0), train_loss, val_loss, test_loss))
        log.record('Evaluate cost:%.4fs, hr:%.4f, ndcg:%.4f' %
                   ((tt3 - tt2), hr, ndcg))

        ## reset train data pointer, and generate new negative data
        d_train.generateTrainNegative()
Example #13
0
                               self.client_address[0])
            except ValueError:
                logging.append("ERROR! light " + data.rsplit('get light ')[1] +
                               " doesn't exist!")
                self.request.send(b"false")
        else:
            self.request.send(b"false")


if __name__ == '__main__':

    # First let's import some config files!
    configFile = configparser.ConfigParser()
    configFile.read('config.ini')

    logging = Logging(configFile['General']['LogFile'])

    address = (configFile['Server']['IP'], int(configFile['Server']['Port']))
    server = socketserver.TCPServer(address, EchoRequestHandler)
    ip, port = server.server_address  # find out what port we were given

    t = threading.Thread(target=server.serve_forever)
    t.setDaemon(True)  # don't hang on exit
    t.start()

    # Start the webserver
    wserver = WebServer(configFile['Web']['IP'], configFile['Web']['Port'])
    wserver.start_server()

    try:
        server.serve_forever()
Example #14
0
""" This is the Starting Point of Kijiji Web Scraping Script Environment Setup """

import sys
from datetime import datetime
from Logging import Logging
from SetupEnvironment import SetupEnvironment
""" Getting Basic Logging Options """

logger = Logging().get_logger("setup")
if logger != None:

    # Checking Python Version
    python_major_version = sys.version_info.major

    if python_major_version < 3:
        logger.critical(
            "Setup Module : Version Issue : Please install Python Version 3 or greater to execute this script"
        )
    else:
        logger.debug("Setup Module : Correct Python Version Found")
        logger.debug("Setup Module : Preparing Script Environment")

        # Staring Environment Setup
        setupEnvironment = SetupEnvironment(logger)
        setupEnvironment.installAndUnpgradeLibraries()

        logger.debug("Setup Module : Environment Setup Completed")
else:
    print("Setup Module : Critical : Logging Setup Could Not Be Completed")
    print("Setup Module : Critical : Process will Exit")
    print("Setup Module : Critical : Contact Administrator For Resolution")
from HandleProperties import HandleProperties
from datetime import datetime
from datetime import date
from datetime import timedelta
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions as EC
from Extract import Extract
from selenium.webdriver.chrome.options import Options
from oslo_concurrency import lockutils
from selenium.common.exceptions import TimeoutException, WebDriverException
from selenium.webdriver.common.keys import Keys

# Getting Basic Logging Options
logger = Logging().get_logger("scraping")

# Fetching Configuration From Properties File
scraping_script_path = pathlib.Path(__file__).parent.absolute()
handleProperties = HandleProperties()
configuration = handleProperties.read_properties(str(scraping_script_path) + "/Config/Scraping.properties")

# Initializing Variables using Command Line Variables
advertisment_links = set()
print(sys.argv)
total_command_line_arguments = len(sys.argv)
logger.debug("Length of Arguments : " + str(total_command_line_arguments))
if total_command_line_arguments > 5 or total_command_line_arguments < 5:
    logger.error("Scraping Module : Incorrect No Of Arguments Passed")
    logger.error("Scraping Module : System exiting")
    sys.exit()
Example #16
0
                                 lr=conf.learning_rate,
                                 weight_decay=conf.weight_decay)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=5,
                                                gamma=0.8)

    ############################## PREPARE DATASET ##############################
    print('System start to load data...')
    t0 = time()
    train_data, val_data, test_data = data_utils.load_all()
    t1 = time()
    print('Data has been loaded successfully, cost:%.4fs' % (t1 - t0))

    ########################### FIRST TRAINING #####################################
    check_dir('%s/train_%s_mrg_id_x.log' % (conf.out_path, conf.data_name))
    log = Logging('%s/train_%s_mrg_id_06.py' % (conf.out_path, conf.data_name))
    train_model_path = '%s/train_%s_mrg_id_06.mod' % (conf.out_path,
                                                      conf.data_name)

    # prepare data for the training stage
    train_dataset = data_utils.TrainData(train_data)
    val_dataset = data_utils.TrainData(val_data)
    test_dataset = data_utils.TrainData(test_data)

    train_batch_sampler = data.BatchSampler(data.RandomSampler(
        range(train_dataset.length)),
                                            batch_size=conf.batch_size,
                                            drop_last=False)
    val_batch_sampler = data.BatchSampler(data.RandomSampler(
        range(val_dataset.length)),
                                          batch_size=conf.batch_size,
    k_means_weight = np.load(
        '/content/drive/My Drive/task/aspect_based_rs/data/amazon_electronics/electronics.k_means.npy'
    )
    model_params['transform_T.weight'] = torch.FloatTensor(
        k_means_weight.transpose())  # (aspect_dimesion, word_dimension)

    model.load_state_dict(model_params)

    model.cuda()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=conf.lr,
                                 weight_decay=conf.weight_decay)

    ########################### FIRST TRAINING #####################################
    log = Logging(
        '/content/drive/My Drive/task/aspect_based_rs/out/amazon_electronics/train_amazon_electronics_aspect_rating_id_x.log'
    )
    train_model_path = '/content/drive/My Drive/task/aspect_based_rs/out/amazon_electronics/train_amazon_electronics_aspect_rating_id_x.mod'

    # prepare data for the training stage
    train_dataset = data_utils.TrainData(train_data, train_review_embedding,
                                         train_user_historical_review_dict,
                                         train_item_historical_review_dict)
    val_dataset = data_utils.TrainData(val_data, val_review_embedding,
                                       val_user_historical_review_dict,
                                       val_item_historical_review_dict)
    test_dataset = data_utils.TrainData(test_data, test_review_embedding,
                                        test_user_historical_review_dict,
                                        test_item_historical_review_dict)

    train_batch_sampler = data.BatchSampler(data.RandomSampler(
Example #18
0
    def scraper(self, site, user, userloc, logpath, hrefCheck, entitie):
        """
        This function scrapes the pages.
        :param site: Url from site
        :param user: User who uses the scraper
        :param userloc: Location of the current user
        :param logpath: Path of the log file.
        :param hrefCheck: Check if its the first time in the scraper.
        :param entitie: Check if the entities need to be extracted.
        :return: The filename and the SHA 256 value.
        """
        text = ""
        href = []
        what = str(site + " scrapen.")
        when = time.strftime("%d/%m/%Y" + " " + "%H:%M:%S")
        why = "Extract text from the site for research."
        result = str(
            site +
            " gescraped. .txt file has been made with the content of the original site."
        )
        if str(site).endswith("\n"):
            site = site[:-1]
        try:
            print site
            scp = Scp()
            logging = Logging()
            HrefParser = hrefParser()
            if site.__contains__(".pdf"):

                # Download pdf and push it to the server
                if site.__contains__("www."):
                    domain = site.split("www.")
                else:
                    domain = site.split("://")
                tld = str(domain[1])
                tld = tld.replace("/", "-")
                filename = "sites/" + tld
                # scraper = Scraper.scraper()
                self.download_file(site, filename)
                hex_dig = self.get_hashes(filename)
                scp.run(filename)
                # Write logging to .csv file.

                logging.log(user, userloc, when, what, why, result, hex_dig,
                            logpath)
                return (filename, hex_dig)

            else:

                # Download the page
                page = requests.get(site)
                soup = BeautifulSoup(page.content, 'html.parser')

                # Extract all P tags
                for x, y in enumerate(soup.find_all('p')):
                    text = text + soup.find_all('p')[x].get_text()
                # Extract all href's
                for a in soup.find_all('a', href=True):
                    href.append(a['href'])

                # Parse text to unicode.
                unitext = unidecode(text)
                if site.__contains__("www."):
                    domain = site.split("www.")
                else:
                    domain = site.split("://")

                # Write text to .txt file
                filename = self.get_filname(domain, unitext)
                hex_dig = self.get_hashes(filename)
                if entitie:
                    self.get_entities(filename, domain)
                scp.run(filename)
                logging.log(user, userloc, when, what, why, result, hex_dig,
                            logpath)

                # Check if its the first scan.
                if hrefCheck == True and entitie == False:
                    HrefParser.parser(href, str(domain[1]))
                print "SHA 256 : " + hex_dig + "\n"

                return

        except ExceptionHandling.WrongStatusCode as e:
            Logging.error_log("Menu", e.message)
            print "\033[93m" + e.message + "\033[0m"
            pass
Example #19
0
def start(conf, data, model_name):
    if conf.data_name in ['beibei']:
        vis_port = 1496
    elif conf.data_name in ['BeiBei2']:
        vis_port = 1469
    elif conf.data_name in ['BeiBei']:
        vis_port = 9641
    log_dir = os.path.join(os.getcwd(), 'log')
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    for reg, lr in product(conf.reg, conf.learning_rate):
        print('reg: {}, lr: {}---------------------------'.format(reg, lr))
        model = eval(model_name)
        model = model(conf, reg, lr)

        data.initializeRankingHandle()

        d_train, d_val, d_test, d_test_eva = data.train, data.val, data.test, data.test_eva

        print('System start to load data...')
        t0 = time()
        d_train.initializeRankingTrain()
        d_val.initializeRankingVT(d_train)
        d_test.initializeRankingVT(d_train)
        d_test_eva.initalizeRankingEva()
        t1 = time()
        print('Data has been loaded successfully, cost:%.4fs' % (t1 - t0))

        # prepare model necessary data.
        data_dict = d_train.prepareModelSupplement()
        model.inputSupply(data_dict)
        model.startConstructGraph()

        # standard tensorflow running environment initialize
        tf_conf = tfv1.ConfigProto()
        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
        tf_conf.gpu_options.allow_growth = True
        #     tf_conf.gpu_options.per_process_gpu_memory_fraction = 0.6
        sess = tfv1.Session(config=tf_conf)
        sess.run(model.init)

        restore_epoch = 0
        if conf.premodel_flag == 1:
            if conf.model_name == 'neumf' and (conf.test == 0):
                hhh = conf.pre_model.split('/')
                checkpoint_gmf = os.path.join('pretrain', conf.data_name,
                                              hhh[0])
                model.saver_GMF.restore(sess, checkpoint_gmf)
                print('restore gmf done')
                checkpoint_mlp = os.path.join('pretrain', conf.data_name,
                                              hhh[1])
                model.saver_mlp.restore(sess, checkpoint_mlp)
                print('restore mlp done')
            else:
                checkpoint = os.path.join('pretrain', conf.data_name,
                                          conf.pre_model)
                model.saver.restore(sess, checkpoint)
                print('restore model done')
        log_path = os.path.join(
            os.getcwd(),
            'log/{}_{}_reg{}_lr{}_neg{}_epoch{}+{}_dim{}_{}.log'.format(
                conf.data_name, conf.model_name, reg, lr, conf.num_negatives,
                restore_epoch, conf.epochs, conf.dimension, conf.test_name))
        # log_path = os.path.join(os.getcwd(), '{}_log/{}/dim{}_{}.log'.format(conf.data_name, conf.model_name, conf.dimension, conf.test_name))
        log = Logging(log_path)
        log.record('Following will output the evaluation of the model:')

        ndcg_item = deque([0] * 30, 30)
        best_ndcg20 = 0.0
        if conf.model_name in ['gcncsr']:
            name = conf.model_name + 'A'
        else:
            name = conf.model_name
        if not conf.test:
            vis = Visdom(port=vis_port,
                         env='{}_reg{}lr{}-{}-dim{}'.format(
                             name, reg, lr, conf.test_name, conf.dimension))
        # Start Training !!!
        if conf.test == 2:
            After_Metric = {'Recall': [], 'NDCG': [], 'MRR': []}
        for epoch in range(1, conf.epochs + 1):
            if conf.test != 1:
                # optimize model with training data and compute train loss
                tmp_train_loss = []
                tmp_train_social_loss = []
                t0 = time()
                while d_train.terminal_flag:
                    d_train.getTrainRankingBatch()
                    d_train.linkedMap()
                    train_feed_dict = {}
                    for (key, value) in model.map_dict['train'].items():
                        train_feed_dict[key] = d_train.data_dict[value]
                    if conf.model_name in ['ngcf']:
                        train_feed_dict[model.training] = True
                    if conf.model_name in ['gcncsr']:
                        train_feed_dict[model.dp_ui] = conf.dp_ui
                    if conf.model_name in ['lightgcn']:
                        train_feed_dict[model.dp] = conf.dp
                    if conf.model_name in ['sorec']:
                        train_feed_dict[
                            model.
                            user_social_loss_idx] = d_train.user_social_loss_idx
                    [sub_train_loss, _] = sess.run(\
                        [model.map_dict['out']['train'], model.opt], feed_dict=train_feed_dict)
                    tmp_train_loss.append(sub_train_loss)
                train_loss = np.mean(tmp_train_loss, 0)
                if conf.model_name in ['gcncsr'] and conf.social_loss:
                    d_train.terminal_flag = 1
                    while d_train.terminal_flag:
                        d_train.getTrainRankingBatch()
                        d_train.linkedMap()
                        [sub_train_social_loss, _] = sess.run(\
                            [model.social_loss, model.social_opt], feed_dict={model.user_social_loss_idx: d_train.user_social_loss_idx})
                        tmp_train_social_loss.append(sub_train_social_loss)
                    social_loss = np.mean(tmp_train_social_loss)
                    if conf.model_name in ['hgcn', 'gcncsr'] and conf.att:
                        social_att = []
                        for i in range(len(d_train.split_idx) - 1):
                            start, end = d_train.split_idx[
                                i], d_train.split_idx[i + 1]
                            att = sess.run(
                                model.social_att,
                                feed_dict={
                                    model.u0:
                                    d_train.social_edges_user0[start:end],
                                    model.u1:
                                    d_train.social_edges_user1[start:end]
                                })
                            social_att.extend(list(att))
                        social_att = np.array(social_att)
                        model.update_social_matrix(social_att)
                if conf.model_name in ['disgcn', 'disbpr'
                                       ] and conf.social_loss:
                    d_train.terminal_flag = 1
                    while d_train.terminal_flag:
                        d_train.getTrainRankingBatch()
                        d_train.linkedMap()
                        train_feed_dict = {}
                        for (key,
                             value) in model.map_dict['train_social'].items():
                            train_feed_dict[key] = d_train.data_dict[value]
                        [sub_train_social_loss, _] = sess.run(\
                            [model.social_loss, model.social_opt], feed_dict=train_feed_dict)
                        tmp_train_social_loss.append(sub_train_social_loss)
                    social_loss = np.mean(tmp_train_social_loss)
                    if conf.model_name in ['disgcn']:
                        ufi_att_list = [
                            sess.run(model.ufi_att_list[k])
                            for k in range(conf.num_layers)
                        ]
                        model.update_ufi_att(ufi_att_list)
                        if conf.att:
                            int_att_list = [
                                sess.run(model.int_att_list[k])
                                for k in range(conf.num_layers)
                            ]
                            social_att_list = [
                                sess.run(model.social_att_list[k])
                                for k in range(conf.num_layers)
                            ]
                            model.update_att(int_att_list, social_att_list)

                d_train.generateTrainNegative()
                d_train.terminal_flag = 1

                t2 = time()
                if conf.model_name in ['disbpr', 'disgcn', 'gcncsr'
                                       ] and conf.social_loss:
                    log.record('Epoch:%d, compute loss cost:%.4fs, train loss:%.4f, social loss: %.4f' % \
                    (epoch, (t2-t0), train_loss, social_loss))
                else:
                    log.record('Epoch:%d, compute loss cost:%.4fs, train loss:%.4f' % \
                    (epoch, (t2-t0), train_loss))
                if not conf.test:
                    X = [epoch]
                    if epoch == 1:
                        if conf.model_name in ['disgcn', 'disbpr', 'gcncsr'
                                               ] and conf.social_loss:
                            vis.line([train_loss],
                                     X,
                                     win='train loss',
                                     opts={'title': 'train loss'})
                            vis.line([social_loss],
                                     X,
                                     win='social loss',
                                     opts={'title': 'social loss'})
                        else:
                            vis.line([train_loss],
                                     X,
                                     win='train loss',
                                     opts={'title': 'train loss'})
                    else:
                        if conf.model_name in ['disgcn', 'disbpr', 'gcncsr'
                                               ] and conf.social_loss:
                            vis.line([train_loss],
                                     X,
                                     win='train loss',
                                     update='append',
                                     opts={'title': 'train loss'})
                            vis.line([social_loss],
                                     X,
                                     win='social loss',
                                     update='append',
                                     opts={'title': 'social loss'})
                        else:
                            vis.line([train_loss],
                                     X,
                                     win='train loss',
                                     update='append',
                                     opts={'title': 'train loss'})

            if epoch % 5 == 0 or conf.test:
                metrics = test(model, conf, d_test_eva, sess, d_train, log)
                for i, k in enumerate(conf.topk):
                    log.record('Recall@{}: {}, NDCG@{}: {}, MRR@{}: {}'.format(
                        k, metrics['Recall'][i], k, metrics['NDCG'][i], k,
                        metrics['MRR'][i]))
                if conf.test == 1:
                    print('test done')
                    exit()
                if conf.test == 2:
                    for k in After_Metric.keys():
                        After_Metric[k].append(metrics[k])
                    if epoch == conf.epochs:
                        for k in After_Metric.keys():
                            After_Metric[k] = np.mean(After_Metric[k], 0)
                        for i, k in enumerate(conf.topk):
                            log.record(
                                'Recall@{}: {}, NDCG@{}: {}, MRR@{}: {}'.
                                format(k, After_Metric['Recall'][i], k,
                                       After_Metric['NDCG'][i], k,
                                       After_Metric['MRR'][i]))
                    continue
                if epoch == 5:
                    vis.line([metrics['NDCG'][1]],
                             X,
                             win='NDCG@20',
                             opts={'title': 'NDCG@20'})
                    vis.line([metrics['Recall'][1]],
                             X,
                             win='Recall@20',
                             opts={'title': 'Recall@20'})
                    vis.line([metrics['MRR'][1]],
                             X,
                             win='MRR@20',
                             opts={'title': 'MRR@20'})
                else:
                    vis.line([metrics['NDCG'][1]],
                             X,
                             win='NDCG@20',
                             update='append',
                             opts={'title': 'NDCG@20'})
                    vis.line([metrics['Recall'][1]],
                             X,
                             win='Recall@20',
                             update='append',
                             opts={'title': 'Recall@20'})
                    vis.line([metrics['MRR'][1]],
                             X,
                             win='MRR@20',
                             update='append',
                             opts={'title': 'MRR@20'})
                if metrics['NDCG'][1] > best_ndcg20:
                    best_ndcg20 = metrics['NDCG'][1]
                    save_path = './pretrain/{}/{}_{}_reg{}_lr{}_epoch{}+{}_dim{}_{}.ckpt'.format(
                        conf.data_name, conf.data_name, conf.model_name, reg,
                        lr, restore_epoch, conf.epochs, conf.dimension,
                        conf.test_name)
                    save_path = model.saver.save(sess,
                                                 save_path,
                                                 write_meta_graph=False)
                    log.record('Model saved in ' + save_path)
                    if conf.model_name in ['gcncsr']:
                        save_path = './pretrain/{}/{}_{}_reg{}_lr{}_epoch{}+{}_dim{}_{}_att.npy'.format(
                            conf.data_name, conf.data_name, conf.model_name,
                            reg, lr, restore_epoch, conf.epochs,
                            conf.dimension, conf.test_name)
                        np.save(
                            save_path,
                            np.expand_dims(
                                sess.run(model.social_neighbors_sparse_matrix.
                                         _values), 1))
                        log.record('save att values')
                    log.record('test metric')
                    metrics_test = test(model, conf, d_test, sess, d_train,
                                        log)
                    for i, k in enumerate(conf.topk):
                        log.record(
                            'Recall@{}: {}, NDCG@{}: {}, MRR@{}: {}'.format(
                                k, metrics_test['Recall'][i], k,
                                metrics_test['NDCG'][i], k,
                                metrics_test['MRR'][i]))
                # if epoch%50 == 0:
                #     save_path = './pretrain/{}/{}_{}_reg{}_lr{}_epoch{}+{}_dim{}_{}_{}.ckpt'.format(conf.data_name, conf.data_name, conf.model_name, reg, lr, restore_epoch, conf.epochs, conf.dimension, conf.test_name, epoch)
                #     save_path = model.saver.save(sess, save_path, write_meta_graph=False)
                #     log.record('Model saved in ' + save_path)
                ndcg_item.append(metrics['NDCG'][2])
                if np.mean(ndcg_item
                           ) > metrics['NDCG'][2] or epoch == conf.epochs:
                    print('ndcg@20 dose not change, early stopping ...')
                    break
Example #20
0
    from fm import fm
    model = fm()

    #model.load_state_dict(torch.load('/content/drive/My Drive/task/aspect_based_rs/out/model/train_amazon_clothing_fm_id_2.mod'))

    model.cuda()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=conf.lr,
                                 weight_decay=conf.weight_decay)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=5,
                                                gamma=0.8)

    ########################### FIRST TRAINING #####################################
    check_dir('%s/train_%s_fm_id_x.log' % (conf.out_path, conf.data_name))
    log = Logging('%s/train_%s_fm_id_02.py' % (conf.out_path, conf.data_name))
    train_model_path = '%s/train_%s_fm_id_02.mod' % (conf.out_path,
                                                     conf.data_name)

    #import pdb; pdb.set_trace()

    # prepare data for the training stage
    train_dataset = data_utils.TrainData(train_data)
    val_dataset = data_utils.TrainData(val_data)
    test_dataset = data_utils.TrainData(test_data)

    train_batch_sampler = data.BatchSampler(data.RandomSampler(
        range(train_dataset.length)),
                                            batch_size=conf.batch_size,
                                            drop_last=False)
    val_batch_sampler = data.BatchSampler(data.RandomSampler(
Example #21
0
    optimizer_review = torch.optim.Adam(model.decoder.parameters(),
                                        lr=conf.learning_rate)
    optimizer_rating = torch.optim.Adam(model.encoder.parameters(),
                                        lr=conf.learning_rate,
                                        weight_decay=conf.weight_decay)

    ############################## PREPARE DATASET ##############################
    print('System start to load data...')
    t0 = time()
    train_data, val_data, test_data = data_utils.load_all()
    t1 = time()
    print('Data has been loaded successfully, cost:%.4fs' % (t1 - t0))

    ########################### FIRST TRAINING #####################################
    check_dir('%s/train_%s_lm_mf_id_x.log' % (conf.out_path, conf.data_name))
    log = Logging('%s/train_%s_lm_mf_id_X.py' %
                  (conf.out_path, conf.data_name))
    train_model_path = '%s/train_%s_lm_mf_id_X.mod' % (conf.out_path,
                                                       conf.data_name)

    # prepare data for the training stage
    train_dataset = data_utils.TrainData(train_data)
    val_dataset = data_utils.TrainData(val_data)
    test_dataset = data_utils.TrainData(test_data)

    train_batch_sampler = data.BatchSampler(data.RandomSampler(
        range(train_dataset.length)),
                                            batch_size=conf.batch_size,
                                            drop_last=False)
    val_batch_sampler = data.BatchSampler(data.RandomSampler(
        range(val_dataset.length)),
                                          batch_size=conf.batch_size,
Example #22
0
# -*- coding: utf-8 -*-

import sys

reload(sys)
sys.setdefaultencoding('utf-8')

from Logging import Logging

if __name__ == '__main__':
    log = Logging('BBDSpider').get_logging()
    log.debug('this is debug message')
    log.info('this is info message')
    log.error('this is error message')
Example #23
0
    t1 = time()
    print('Data has been loaded successfully, cost:%.4fs' % (t1 - t0))

    ############################## CREATE MODEL ##############################
    from abae_rs import abae_rs
    model = abae_rs()

    #model.load_state_dict(torch.load('/content/drive/My Drive/task/aspect_based_rs/out/amazon_clothing/train_amazon_clothing_pmf_id_X1.mod'))
    model.cuda()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=conf.learning_rate,
                                 weight_decay=conf.weight_decay)

    ########################### FIRST TRAINING #####################################
    check_dir('%s/train_%s_ncf_id_x.log' % (conf.out_path, conf.data_name))
    log = Logging('%s/train_%s_ncf_id_X1.log' %
                  (conf.out_path, conf.data_name))
    train_model_path = '%s/train_%s_ncf_id_X1.mod' % (conf.out_path,
                                                      conf.data_name)

    # prepare data for the training stage
    train_dataset = data_utils.TrainData(train_data, user_seq_dict,
                                         item_seq_dict)
    val_dataset = data_utils.TrainData(val_data, user_seq_dict, item_seq_dict)
    test_dataset = data_utils.TrainData(test_data, user_seq_dict,
                                        item_seq_dict)

    train_batch_sampler = data.BatchSampler(data.RandomSampler(\
        range(train_dataset.length)), batch_size=conf.batch_size, drop_last=False)
    val_batch_sampler = data.BatchSampler(data.RandomSampler(\
        range(val_dataset.length)), batch_size=conf.batch_size, drop_last=False)
    test_batch_sampler = data.BatchSampler(data.RandomSampler(\
Example #24
0
            word_embedding.wv[word_embedding.wv.index2entity[idx - 3]])

    k_means_weight = np.load(
        '/content/drive/My Drive/task/aspect_based_rs/data/amazon_movies_tv/amazon_movies_tv.k_means.npy'
    )
    model_params['transform_T.weight'] = torch.FloatTensor(
        k_means_weight.transpose())  # (aspect_dimesion, word_dimension)

    model.load_state_dict(model_params)

    model.cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr=conf.learning_rate)

    ########################### FIRST TRAINING #####################################
    log = Logging(
        '/content/drive/My Drive/task/aspect_based_rs/out/amazon_movies_tv/train_amazon_movies_tv_abae_id_x.log'
    )
    train_model_path = '/content/drive/My Drive/task/aspect_based_rs/out/amazon_movies_tv/train_amazon_movies_tv_abae_id_x.mod'

    # prepare data for the training stage
    train_dataset = data_utils.TrainData(train_data, train_review_embedding)
    val_dataset = data_utils.TrainData(val_data, val_review_embedding)
    test_dataset = data_utils.TrainData(test_data, test_review_embedding)

    train_batch_sampler = data.BatchSampler(data.RandomSampler(
        range(train_dataset.length)),
                                            batch_size=conf.batch_size,
                                            drop_last=False)
    val_batch_sampler = data.BatchSampler(data.RandomSampler(
        range(val_dataset.length)),
                                          batch_size=conf.batch_size,
Example #25
0
    model.load_state_dict(model_params)
    '''
    model.load_state_dict(
        torch.load('%s/train_%s_abae_id_adabound.mod' %
                   (conf.model_path, conf.data_name)))

    model.cuda()
    #optimizer = torch.optim.Adam(model.parameters(), lr=conf.learning_rate)
    import adabound
    optimizer = adabound.AdaBound(model.parameters(),
                                  lr=conf.learning_rate,
                                  final_lr=0.1)

    ########################### FIRST TRAINING #####################################
    check_dir('%s/train_%s_abae_id_x.log' % (conf.out_path, conf.data_name))
    log = Logging('%s/train_%s_abae_id_adabound.log' %
                  (conf.out_path, conf.data_name))
    train_model_path = '%s/train_%s_abae_id_adabound.mod' % (conf.out_path,
                                                             conf.data_name)

    # prepare data for the training stage
    train_dataset = data_utils.TrainData(train_data, train_review_embedding)
    val_dataset = data_utils.TrainData(val_data, val_review_embedding)
    test_dataset = data_utils.TrainData(test_data, test_review_embedding)

    train_batch_sampler = data.BatchSampler(data.RandomSampler(
        range(train_dataset.length)),
                                            batch_size=conf.batch_size,
                                            drop_last=False)
    val_batch_sampler = data.BatchSampler(data.RandomSampler(
        range(val_dataset.length)),
                                          batch_size=conf.batch_size,
Example #26
0
    def unifyRequestResult(data_dict, bbd_type):
        """
        同理,整理抓取后的字段
        :param company_name:  (unicode) 公司名
        :param data_dict:  (dict)  公司信息
        :return:  (bool) 是否成功存储 -> true / false
        """
        try:
            qyxx_dict = {
                "guangdong": u"广东",
                "hubei": u"湖北",
                "hunan": u"湖南",
                "henan": "河南",
                "heilongjiang": u"黑龙江",
                "hebei": u"湖北",
                "hainan": u"海南",
                "guizhou": u"贵州",
                "guangxi": u"广西",
                "fujian": u"福建",
                "chongqing": u"重庆",
                "beijing": u"北京",
                "anhui": u"安徽",
                "jiangsu": u"江苏",
                "gansu": u"甘肃",
                "xinjiang": u"新疆",
                "tianjin": u"天津",
                "sichuan": u"四川",
                "shanxixian": u"陕西",
                "shanxitaiyuan": u"山西",
                "shandong": u"山东",
                "shanghai": u"上海",
                "qinghai": u"青海",
                "ningxia": u"宁夏",
                "neimenggu": u"内蒙古",
                "liaoning": u"辽宁",
                "jilin": u"吉林",
                "jiangxi": u"江西",
                "xizang": u"西藏",
                "zhejiang": u"浙江",
                "yunnan": u"云南",
                "zongju": u"总局"
            }
            logger = Logging(name=bbd_type)
            if not data_dict.has_key("rowkey_dict"):
                raise Exception(
                    "Company data dict don't has rowkey values, wrong data")
            else:

                data_dict = copy.deepcopy(data_dict)
                rowkey_dict = copy.deepcopy(data_dict["rowkey_dict"])
                uptime = TimeUtil.timeStamp()
                dotime = TimeUtil.doTime()

                rowkey_dict["uptime"] = uptime
                rowkey_dict["dotime"] = dotime
                rowkey_dict["bbd_type"] = bbd_type

                data_dict.update(rowkey_dict)

                id_column_list = ["company_name", "company_zch", "dotime"]
                _id = UniField.getId(data_dict, id_column_list)

                # 添加rowkey 和  _id
                # rk_column_list=["company_name","bbd_type","dotime"]
                rowkey = UniField.getRowkey(bbd_type, _id)
                logger.info(u"统一字段(网页原文) 产生rowkey 为:[%s]", rowkey)
                data_dict["rowkey"] = rowkey
                data_dict["_id"] = _id

                if bbd_type in qyxx_dict.keys():
                    data_dict["type"] = qyxx_dict[bbd_type]

                if data_dict.has_key(None):
                    del data_dict[None]

                return data_dict
        except Exception as e:
            logger.info(str(e))
Example #27
0
    #model.encoder.user_embedding.weight.requires_grad = False
    #model.encoder.item_embedding.weight.requires_grad = False

    optimizer = torch.optim.Adam(model.parameters(), lr=conf.learning_rate)

    ############################## PREPARE DATASET ##############################
    print('System start to load data...')
    t0 = time()
    train_data, val_data, test_data = data_utils.load_all()
    t1 = time()
    print('Data has been loaded successfully, cost:%.4fs' % (t1 - t0))

    ########################### FIRST TRAINING #####################################
    check_dir('%s/train_%s_lm_id_x.py' % (conf.out_path, conf.data_name))
    log = Logging('%s/train_%s_att2seq_id_X.py' %
                  (conf.out_path, conf.data_name))
    train_model_path = '%s/train_%s_att2seq_id_X' % (conf.out_path,
                                                     conf.data_name)

    # prepare data for the training stage
    train_dataset = data_utils.TrainData(train_data)
    train_batch_sampler = data.BatchSampler(data.RandomSampler(\
        range(train_dataset.length)), batch_size=conf.batch_size, drop_last=False)

    review_val_dataset = data_utils.TestData(val_data)
    review_val_sampler = data.BatchSampler(data.RandomSampler(\
        range(review_val_dataset.length)), batch_size=conf.batch_size, drop_last=False)

    review_test_dataset = data_utils.TestData(test_data)
    review_test_sampler = data.BatchSampler(data.RandomSampler(\
        range(review_test_dataset.length)), batch_size=conf.batch_size, drop_last=False)
Example #28
0
    # load word embedding from pretrained word2vec model
    model_params = model.state_dict()
    word_embedding = Word2Vec.load('/content/drive/My Drive/task/aspect_based_rs/data/amazon_clothing/amazon_clothing.wv.model')
    for idx in range(3, conf.vocab_sz):
        model_params['word_embedding.weight'][idx] = torch.FloatTensor(word_embedding.wv[word_embedding.wv.index2entity[idx-3]])
    model.load_state_dict(model_params)

    #model.load_state_dict(torch.load('/content/drive/My Drive/task/aspect_based_rs/out/amazon_clothing/train_amazon_clothing_language_model_id_0X.mod'))
    model.cuda()
    
    optimizer = torch.optim.Adam(model.parameters(), lr=conf.learning_rate)

    ########################### FIRST TRAINING #####################################
    check_dir('%s/train_%s_language_model_id_x.log' % (conf.out_path, conf.data_name))
    log = Logging('%s/train_%s_language_model_id_0X.py' % (conf.out_path, conf.data_name))
    train_model_path = '%s/train_%s_language_model_id_0X.mod' % (conf.out_path, conf.data_name)

    # prepare data for the training stage
    train_dataset = data_utils.TrainData(train_data)
    val_dataset = data_utils.TrainData(val_data)
    test_dataset = data_utils.TrainData(test_data)

    train_batch_sampler = data.BatchSampler(data.RandomSampler(range(train_dataset.length)), batch_size=conf.batch_size, drop_last=False)
    val_batch_sampler = data.BatchSampler(data.RandomSampler(range(val_dataset.length)), batch_size=conf.batch_size, drop_last=False)
    test_batch_sampler = data.BatchSampler(data.RandomSampler(range(test_dataset.length)), batch_size=conf.batch_size, drop_last=False)

    # Start Training !!!
    min_loss = 0
    for epoch in range(1, conf.train_epochs+1):
        t0 = time()
Example #29
0
    from nrms import nrms
    model = nrms()
    
    model.cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr=conf.learning_rate)

    ############################## PREPARE DATASET ##############################
    print('System start to load data...')
    t0 = time()
    train_data, val_data = data_utils.load_all()
    t1 = time()
    print('Data has been loaded successfully, cost:%.4fs' % (t1 - t0))

    ########################### TRAINING STAGE ##################################
    check_dir('%s/train_log' % conf.out_path)
    log = Logging('%s/train_%s_nrms.log' % (conf.out_path, conf.data_name))
    train_model_path = '%s/train_%s_nrms.mod' % (conf.out_path, conf.data_name)

    # prepare data for the training stage
    train_dataset = data_utils.TrainData(train_data)
    val_dataset = data_utils.TestData(val_data)

    train_batch_sampler = data.BatchSampler(data.RandomSampler(
        range(train_dataset.length)), batch_size=conf.batch_size, drop_last=False)
    val_batch_sampler = data.BatchSampler(data.SequentialSampler(
        range(val_dataset.length)), batch_size=conf.batch_size, drop_last=True)

    # Start Training !!!
    max_auc = 0
    for epoch in range(1, conf.train_epochs+1):
        t0 = time()
                                 weight_decay=conf.weight_decay)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=5,
                                                gamma=0.8)

    ############################## PREPARE DATASET ##############################
    print('System start to load data...')
    t0 = time()
    train_data, val_data, test_data = data_utils.load_all()
    t1 = time()
    print('Data has been loaded successfully, cost:%.4fs' % (t1 - t0))

    ########################### FIRST TRAINING #####################################
    check_dir('%s/train_%s_expansion_net_id_x.log' %
              (conf.out_path, conf.data_name))
    log = Logging('%s/train_%s_expansion_net_id_X.py' %
                  (conf.out_path, conf.data_name))
    train_model_path = '%s/train_%s_expansion_net_id_X.mod' % (conf.out_path,
                                                               conf.data_name)

    # prepare data for the training stage
    train_dataset = data_utils.TrainData(train_data)
    val_dataset = data_utils.TrainData(val_data)
    test_dataset = data_utils.TrainData(test_data)

    train_batch_sampler = data.BatchSampler(data.RandomSampler(
        range(train_dataset.length)),
                                            batch_size=conf.batch_size,
                                            drop_last=False)
    val_batch_sampler = data.BatchSampler(data.RandomSampler(
        range(val_dataset.length)),
                                          batch_size=conf.batch_size,