Beispiel #1
0
    def test_adapt_url(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        amazon_url = Amazon.adapt_url(Amazon, user_request, country)

        self.assertEqual(
            amazon_url, 'https://www.amazon.com.mx/s?k=audifonos+inalambricos')
Beispiel #2
0
    def test_there_is_soup(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        amazon_url = Amazon.adapt_url(Amazon, user_request, country)

        amz_soup = extract_soup(amazon_url, 1, just_soup=True)

        self.assertIsNotNone(amz_soup)
Beispiel #3
0
    def test_conection_status(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        amazon_url = Amazon.adapt_url(Amazon, user_request, country)

        amz_status = extract_soup(amazon_url, 0, just_status=True)

        self.assertEqual(amz_status, 200)
Beispiel #4
0
    def test_get_brute_info_including_Nones(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        amazon_url = Amazon.adapt_url(Amazon, user_request, country)
        amz_soup = extract_soup(amazon_url, 1, just_soup=True)

        #New test
        amz_boxes = search_boxes(amz_soup, Amazon.boxes)
        self.assertEqual(len(amz_boxes), 60)
Beispiel #5
0
    def test_get_brute_info_without_losses(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        amazon_url = Amazon.adapt_url(Amazon, user_request, country)
        amazon_soup = extract_soup(amazon_url, 1, just_soup=True)
        amazon_boxes = search_boxes(amazon_soup, Amazon.boxes)

        #New test
        amazon_string_stars = get_brute_info(amazon_boxes, Amazon.stars)
        self.assertEqual(len(amazon_boxes), len(amazon_string_stars))
Beispiel #6
0
    def test_products_info_getters(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        amazon_url = Amazon.adapt_url(Amazon, user_request, country)
        amazon_soup = extract_soup(amazon_url, 1, just_soup=True)
        amazon_boxes = search_boxes(amazon_soup, Amazon.boxes)

        amazon_names = len(get_names(amazon_boxes, Amazon.name_and_images))
        amazon_images = len(get_images(amazon_boxes, Amazon))
        amazon_urls = len(get_products_urls(amazon_boxes, Amazon))
        amazon_price = len(get_price(country, amazon_boxes, Amazon.price))
        amazon_reviews = len(get_reviews(country, amazon_boxes,
                                         Amazon.reviews))
        amazon_stars = len(get_stars(country, amazon_boxes, Amazon.stars))

        trials = [
            amazon_names, amazon_images, amazon_urls, amazon_price,
            amazon_reviews, amazon_stars
        ]
        for test in trials:
            self.assertEqual(len(amazon_boxes), test)
def scraper(user_request, country):
    #Adapt the url
    amazon_url = Amazon.adapt_url(Amazon, user_request, country)

    #All the HTML of the page
    amazon_soup = extract_soup(amazon_url, 1, just_soup=True)

    # #HTML divided by products, and stored as elements of an array
    amazon_boxes = search_boxes(amazon_soup, Amazon.boxes)

    # From this part, could get better AFTER the 4 scrapers are made
    #From the Boxes, obtain the prices
    amazon_prices = get_price(country, amazon_boxes, Amazon.price)

    #Obtain the cheapest from prices and then, you obtain the cheapest product as a dictionary
    amazon_cheapest_idx, amazon_cheapest_price = cheapest(
        amazon_prices, position_and_price=True)
    cheapest_amazon_product_dictionary = get_cheapest(amazon_cheapest_idx,
                                                      amazon_boxes,
                                                      amazon_cheapest_price,
                                                      country, Amazon)

    return cheapest_amazon_product_dictionary
    #Obtain the cheapest from prices and then, you obtain the cheapest product as a dictionary
    amazon_cheapest_idx, amazon_cheapest_price = cheapest(
        amazon_prices, position_and_price=True)
    cheapest_amazon_product_dictionary = get_cheapest(amazon_cheapest_idx,
                                                      amazon_boxes,
                                                      amazon_cheapest_price,
                                                      country, Amazon)

    return cheapest_amazon_product_dictionary


if __name__ == "__main__":

    user_request = 'audifonos inalambricos'
    country = 'mx'
    amazon_url = Amazon.adapt_url(Amazon, user_request, country)

    #All the HTML of the page
    amazon_soup = extract_soup(amazon_url, 1, just_soup=True)

    #HTML divided by products, and stored as elements of an array
    amazon_boxes = search_boxes(amazon_soup, Amazon.boxes)
    amazon_products = {}

    amazon_products['name'] = get_names(amazon_boxes, Amazon.name_and_images)
    '''Amazon's images source (link)'''
    amazon_products['image'] = get_images(amazon_boxes, Amazon)

    amazon_products['url'] = get_products_urls(amazon_boxes, Amazon)
    '''Just Amazon's products id. Is used as a url generator:
    amazon's url + domain + "/dp/" + product_id'''
Beispiel #9
0
    parser.add_argument('--T', type=int, default=3)

    # train arguments
    parser.add_argument('--n_iter', type=int, default=100)
    parser.add_argument('--seed', type=int, default=1234)
    parser.add_argument('--batch_size', type=int, default=2048)
    parser.add_argument('--learning_rate', type=float, default=1e-3)
    parser.add_argument('--l2', type=float, default=0)

    # model dependent arguments
    parser.add_argument('--d', type=int, default=50)

    config = parser.parse_args()

    from data import Amazon
    data_set = Amazon.Beauty()  # Books, CDs, LastFM
    train_set, test_set, num_users, num_items, kg_map = data_set.generate_dataset(
        index_shift=1)

    maxlen = 0
    for inter in train_set:
        if len(inter) > maxlen:
            maxlen = len(inter)

    train = Interactions(train_set, num_users, num_items)
    train.to_newsequence(config.L, config.T)

    logger.info(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
    logger.info(config)

    train_kerl(train, test_set, config, kg_map)
Beispiel #10
0
    parser.add_argument('--model', type=str, default='xHAM')
    parser.add_argument('--d', type=int, default=50)

    config = parser.parse_args()

    #the code below is used to specify the directories to store the results
    #resultsName = 'all_results'
    #logName = resultsName+'/'+config.model+'/'+config.setting+'/'+config.data+'/'+config.data+'_'+str(config.d)+'_'+str(config.L)+'_'+str(config.T)+'_'+str(config.P)+'_'+str(config.l2)+'_'+str(config.order)+'_'+config.abla+'.'+config.setting

    ##logging.basicConfig(filename=logName, level=logging.DEBUG)
    logging.basicConfig(level=logging.DEBUG)
    logger = logging.getLogger(__name__)

    if config.data == 'CDs':
        from data import Amazon
        data_set = Amazon.CDs()
    elif config.data == 'Books':
        from data import Amazon
        data_set = Amazon.Books()
    elif config.data == 'Children':
        from data import GoodReads
        data_set = GoodReads.Children()
    elif config.data == 'Comics':
        from data import GoodReads
        data_set = GoodReads.Comics()
    elif config.data == 'ML20M':
        from data import MovieLens
        data_set = MovieLens.ML20M()
    elif config.data == 'ML1M':
        from data import MovieLens
        data_set = MovieLens.ML1M()
Beispiel #11
0
Datei: run.py Projekt: mindis/UBS
    parser.add_argument('--L', type=int, default=5)
    parser.add_argument('--T', type=int, default=3)

    # train arguments
    parser.add_argument('--n_iter', type=int, default=1000)
    parser.add_argument('--seed', type=int, default=1234)
    parser.add_argument('--batch_size', type=int, default=4096)
    parser.add_argument('--learning_rate', type=float, default=1e-3)
    parser.add_argument('--l2', type=float, default=1e-3)
    parser.add_argument('--neg_samples', type=int, default=3)
    parser.add_argument('--sets_of_neg_samples', type=int, default=50)

    # model dependent arguments
    parser.add_argument('--d', type=int, default=50)

    config = parser.parse_args()

    from data import Amazon
    data_set = Amazon.MI()
    #data_set = Amazon.CDs()  # MovieLens.ML20M()  # Books, CDs, Electronics
    # item_id=0 for sequence padding
    train_set, val_set, train_val_set, test_set, num_users, num_items = data_set.generate_dataset(
        index_shift=1)
    train = Interactions(train_val_set, num_users, num_items)
    train.to_sequence(config.L, config.T)

    logger.info(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
    logger.info(config)
    train_model(train, test_set, config)
    np.save('NDCG_10_B.npy', NDCGS)
Beispiel #12
0
    # data arguments
    parser.add_argument('--L', type=int, default=5)
    parser.add_argument('--T', type=int, default=3)

    # train arguments
    parser.add_argument('--n_iter', type=int, default=200)
    parser.add_argument('--seed', type=int, default=1234)
    parser.add_argument('--batch_size', type=int, default=4096)
    parser.add_argument('--learning_rate', type=float, default=1e-3)
    parser.add_argument('--l2', type=float, default=1e-3)
    parser.add_argument('--neg_samples', type=int, default=3)
    parser.add_argument('--sets_of_neg_samples', type=int, default=50)

    # model dependent arguments
    parser.add_argument('--d', type=int, default=50)

    config = parser.parse_args()

    from data import Amazon, MovieLens
    data_set = Amazon.CDs()  # MovieLens.ML20M()  # Books, CDs, Electronics
    # item_id=0 for sequence padding
    train_set, val_set, train_val_set, test_set, num_users, num_items = data_set.generate_dataset(
        index_shift=1)
    train = Interactions(train_val_set, num_users, num_items)
    train.to_sequence(config.L, config.T)

    logger.info(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
    logger.info(config)
    train_model(train, test_set, config)
Beispiel #13
0
    # data arguments
    parser.add_argument('--L', type=int, default=5)
    parser.add_argument('--T', type=int, default=3)

    # train arguments
    parser.add_argument('--n_iter', type=int, default=200)
    parser.add_argument('--seed', type=int, default=1234)
    parser.add_argument('--batch_size', type=int, default=4096)
    parser.add_argument('--learning_rate', type=float, default=1e-3)
    parser.add_argument('--l2', type=float, default=1e-3)
    parser.add_argument('--neg_samples', type=int, default=3)
    parser.add_argument('--sets_of_neg_samples', type=int, default=50)

    # model dependent arguments
    parser.add_argument('--d', type=int, default=50)

    config = parser.parse_args()

    from data import Amazon
    data_set = Amazon.Books()
    #data_set = Amazon.CDs()  # MovieLens.ML20M()  # Books, CDs, Electronics
    # item_id=0 for sequence padding
    train_set, val_set, train_val_set, test_set, num_users, num_items = data_set.generate_dataset(
        index_shift=1)
    train = Interactions(train_val_set, num_users, num_items)
    train.to_sequence(config.L, config.T)

    logger.info(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
    logger.info(config)
    train_model(train, test_set, config)