def wrapped(self, asin, is_cache=True): from amazon import Amazon md5 = hashlib.md5(asin) key = '%s.gz' % md5.hexdigest() product_info = Amazon.load_cache(key) if 'modified_date' in product_info and ( datetime.datetime.now() - product_info['modified_date'] ).days < Amazon.CACHE_EXPIRED_DAYS: pass else: self.load('https://www.amazon.com/dp/%s' % asin) page = self.lr.body image_urls = [] for ele in self.lr.xpaths('//div[@class="imgTagWrapper"]/img'): if 'data-old-hires' in ele.attrib: image_urls.append(ele.attrib['data-old-hires']) product_info['page'] = page product_info['image_urls'] = image_urls product_info['modified_date'] = datetime.datetime.now() Amazon.save_cache(key, product_info) return detail(self, asin, is_cache=is_cache, page_info=product_info)
def amazon_info(request): title = request.params["title"] author = request.params["author"] keys = AmazonKeys() amazon = Amazon(keys.access_key, keys.secret_access_key, keys.associate_tag) amazon.setProxy("proxy.ome.toshiba.co.jp") result = {} try: result_xml = amazon.itemSearch("Books", Title=u"獄門島", Author=u"横溝正史", ResponseGroup="Medium") # 本 dom = xml.dom.minidom.parseString(result_xml) items = dom.getElementsByTagName("Item") if len(items) != 0: item = items[0] result["image"] = getValue(item, "MediumImage", "URL") result["url"] = getValue(item, "DetailPageURL") result["title"] = title print getValue(item, "ASIN"), getValue(item, "LargeImage", "URL"), getValue( item, "DetailPageURL" ), getValue(item, "Author"), getValue(item, "Title") dom.unlink() except HTTPError, e: print e.code
def get_asin(request): output = {} if request.method == "GET": if request.GET.get("url"): amazon = Amazon() url = request.GET.get("url") asin = amazon.get_asin_from_url(url) output["asin"] = asin return HttpResponse(json.dumps(output), content_type="application/json")
def save(self): if not self.id: a = Amazon() a.get_document(self.ISBN) self.title = a.title self.author = a.author self.summary = a.summary self.cover_image = a.cover_image self.book_image = settings.MEDIA_URL + '/media/book_' + str(random.randint(1,9)) + '.png' self.asin = a.asin super(Book, self).save()
def checkout_for_list(request, list): lists = get_object_or_404(List, id=list) output = {} amazon = Amazon() amazon.get_items(list=list) cart = amazon.get_cart() output["url"] = "%s" % (cart.Cart.PurchaseURL) output["subtotal"] = "%s" % (cart.Cart.SubTotal.FormattedPrice) print cart.Cart.PurchaseURL return HttpResponse(json.dumps(output), content_type="application/json")
def get_item_for_asin(request, asin): output = {} if request.method == "GET": amazon = Amazon() item = amazon.get_item_by_asin(asin) output["asin"] = str(asin) output["url"] = str(item.Items.Item.DetailPageURL) output["manufacturer"] = str(item.Items.Item.ItemAttributes.Manufacturer) output["product_group"] = str(item.Items.Item.ItemAttributes.ProductGroup) output["title"] = str(item.Items.Item.ItemAttributes.Title) output["description"] = str(item.Items.Item.ItemLinks.ItemLink.Description) output["short_url"] = "http://amzn.com/%s" % asin return HttpResponse(json.dumps(output), content_type="application/json")
def con_search(keyword): amazon = Amazon("", "") xml = amazon.itemSearch("Books", Keywords=keyword, ItemPage="1") # XMLから情報を取り出す from BeautifulSoup import BeautifulStoneSoup soup = BeautifulStoneSoup(xml) #10個のコンテンツをコンテンツテーブルへ格納 for item in soup.findAll('item'): isbn10 = item.asin.contents[0] #issbn13 #isbn13 = item.ean.contents[0] isbn13 = '' #author try: #author(著者)はない場合があるのでエラー処理 author = item.author.contents[0] except AttributeError: author = "" else: author = item.author.contents[0] title = item.title.contents[0] #url url = item.detailpageurl.contents[0] #image_url try: image_url = item.largeimage.url.contents[0] except NameError: image_url = "" except AttributeError: image_url = "" else: image_url = item.largeimage.url.contents[0] arasuji = HTMLParser(url) #文字参照を変換する arasuji = htmlentity2utf8(arasuji) #あらすじからいらない文字、記号を取り除く #print arasuji con_tag = extractKeyword(arasuji) con_tag = ','.join(con_tag) #print title,isbn10 #DBへ格納する dbupdate(isbn10, isbn13, title, author, image_url, arasuji, con_tag) return
def getAmazon(isbn): result = [] amazon = Amazon(access_key, access_secret, associate_tag) xml = amazon.itemLookup(isbn, SearchIndex='Books', IdType='ISBN', ReviewPage='1', ResponseGroup='Reviews') soup = BeautifulSoup(xml) if soup.find('hasreviews').getText() == 'false': return result url = soup.find('iframeurl').contents[0] iframe = urllib.urlopen(url.replace('&', '&')) soup = BeautifulSoup(iframe) for div in soup.findAll('div', {'style': 'margin-left:0.5em;'}): for i in div.findAll('div'): i.replaceWith('') result.append(div.getText()) return result
print(f'wall: {time.time() - start}') f.write(f'wall: {time.time() - start}') if __name__ == '__main__': args = config() devices = list(map(int, args.gpu.split(','))) n_gpus = len(devices) # For GCMC based on sample, we require node has its own features. # Otherwise (node_id is the feature), the model can not scale dataset = Amazon(args.data_name, 'cpu', mix_cpu_gpu=args.mix_cpu_gpu, use_one_hot_fea=args.use_one_hot_fea, symm=args.gcn_agg_norm_symm, test_ratio=args.data_test_ratio, valid_ratio=args.data_valid_ratio) print("Loading data finished ...\n") args.src_in_units = dataset.user_feature_shape[1] args.dst_in_units = dataset.item_feature_shape[1] args.rating_vals = dataset.possible_rating_values # cpu if devices[0] == -1: run(0, 0, args, ['cpu'], dataset) # gpu elif n_gpus == 1: run(0, n_gpus, args, devices, dataset)
from amazon import Amazon from pesquisa import Pesquisa pesquisa = Pesquisa() produto = 'iphone' conteudo = pesquisa.procurarProduto(produto) amazon = Amazon() amazon.recuperarProdutos(conteudo) amazon.recuperarPrecos(conteudo) amazon.gravarDados('produto2') pesquisa.finalizar()
from database import DataBase from config import Config from amazon import Amazon if __name__ == '__main__': config = Config() dbData = config.data['database'] awsData = config.data['aws'] # Connect to MySQL DB db = DataBase(dbData['host'], dbData['user'], dbData['pass'], dbData['name']) # Connect to AWS amazon = Amazon(awsData['key_name'], awsData['secret_key']) if hasattr(config, 'domains') and config.domains == ['all']: config.domains = [] domains_list = db.get_all(db.cursor, 'domains') if domains_list == []: sys.exit('No domains records on database.') for d in domains_list: config.domains.append(d['name']) if config.action == 'check': for domain_name in config.domains: domain = db.get(db.cursor, 'domains', 'name', domain_name) if not domain: print "[ERROR] The domain doesn't exist" sys.exit(0)
elif options.loglevel == "INFO": logLevelInt = 3 elif options.loglevel == "WARN": logLevelInt = 4 else: LOG.warning("Unknown Log level %s given. Defaulting to INFO." % options.loglevel) widevineAdapter.SetLogLevel(logLevelInt) if options.deleteDumpDir: os.system("rm {0}/*".format(constants.DUMP_DIRECTORY)) if not os.path.isdir(constants.DUMP_DIRECTORY): os.mkdir(constants.DUMP_DIRECTORY) az = Amazon() if URL_REGEX.match(amazonUrlOrAsin): LOG.info("Using amazon url: %s" % amazonUrlOrAsin) # login is needed else e.g. no asin can be retrieved az.logIn() titleAmazonMovieEntryMap = az.getVideoPageDetailsMapping( amazonUrlOrAsin) for key, val in titleAmazonMovieEntryMap.items(): LOG.info("%s: %s" % (key, val)) LOG.info("") if {} == titleAmazonMovieEntryMap: LOG.warning("No video data could be found! Mapping was empty.") else: LOG.info("Launching rendering script for ASIN %s" % amazonUrlOrAsin)
def main(secondsToAudit, configFile, gracePeriod, historyFile, logFile, auditPayments, auditRefunds): global _config global _civiDB global _awsLink global _stompLink global _logFile startTime = datetime.fromtimestamp(int(time.time()) - int(secondsToAudit), pytz.utc) endTime = datetime.fromtimestamp(int(time.time()) - int(gracePeriod), pytz.utc) print("AWS audit requested from %s to %s" % (startTime.isoformat(), endTime.isoformat())) # === Initialize the configuration file === localdir = os.path.dirname(os.path.abspath(__file__)) _config = SafeConfigParser() fileList = ["%s/amazon-audit.cfg" % localdir] if configFile is not None: fileList.append(configFile) _config.read(fileList) # === Open up ze STOMP === _stompLink = DistStomp(_config.get('Stomp', 'server'), _config.getint('Stomp', 'port')) _stompLink.connect() # === Connection to Amazon === _awsLink = Amazon( awsEndpoint=_config.get('AwsConfig', 'endpoint'), awsAccessKey=_config.get('AwsConfig', 'accessKey'), awsSecret=_config.get('AwsConfig', 'secretKey') ) # === Connection to MySQL === _civiDB = MySQL.connect( _config.get('MySQL', 'host'), _config.get('MySQL', 'user'), _config.get('MySQL', 'password'), _config.get('MySQL', 'schema') ) # === Open up the history and log files === # If the history file exists, it will modify the start time of this script to be the end time of the # history file. hfile = None historyStart = startTime if historyFile and os.path.exists(historyFile): hfile = open(historyFile, 'r') if hfile.readline().strip() == AWS_HISTORY_FILE_VERSTR: historyStart = dateutil.parser.parse(hfile.readline().strip()) historyEnd = dateutil.parser.parse(hfile.readline().strip()) startTime = historyEnd print("History file modified search period, now %s to %s" % (startTime.isoformat(), endTime.isoformat())) else: print('Not starting with a valid history file.') if logFile: _logFile = open(logFile, 'a') _logFile.write("!!! Starting run for dates %s -> %s\n" % (startTime.isoformat(), endTime.isoformat())) # === Sanity checks === if endTime < startTime: startTime = endTime # === Main Application === awsTransactions = [] # --- Process all previously pending transactions from the history file. If the transaction is still in some form # of pending, add it back to the history list. historyCount = 0 historyList = [] historyStats = { 'Success': 0, 'Pending': 0, 'Failed': 0, 'Ignored': 0 } if hfile: print("Processing history file") for txn in hfile: historyCount += 1 awsTransactions.append(json.loads(txn)) hfile.close() # --- Obtain AWS history --- if auditPayments: print("Obtaining AWS payment transactions for the period %s -> %s" % (startTime.isoformat(), endTime.isoformat())) awsTransactions += _awsLink.getAccountActivity(startTime, endDate=endTime, fpsOperation='Pay') print("Obtained %d transactions" % len(awsTransactions)) if auditRefunds: print("Obtaining AWS refund transactions for the period %s -> %s" % (startTime.isoformat(), endTime.isoformat())) awsTransactions += _awsLink.getAccountActivity(startTime, endDate=endTime, fpsOperation='Refund') print("Obtained %d transactions" % len(awsTransactions)) # --- Main loop: checks each aws transaction against the Civi database; adding it if it doesn't exist --- txncount = 0 for txn in awsTransactions: txncount += 1 result = dispatchTransaction(txn, auditPayments, auditRefunds) historyStats[result] += 1 if result == 'Pending': historyList.append(txn) print("\n--- Finished processing of messages. ---\n") # --- Write the history file --- if historyFile: print("Rewriting history file with %d transactions" % len(historyList)) hfile = open(historyFile, 'w') hfile.write("%s\n%s\n%s\n" % (AWS_HISTORY_FILE_VERSTR, historyStart.isoformat(), endTime.isoformat())) for txn in historyList: hfile.write("%s\n" % json.dumps(txn)) print("Flushing history file in preparation for main loop") hfile.flush() # --- Final statistics print("Processed %d AWS messages" % txncount) print(" ... of which %d messages were from history" % historyCount) print("This resulted in the following:") for entry in historyStats.items(): print(" %s Messages: %d" % entry) # === Final Application Cleanup === print("\nCleaning up.") _civiDB.close() _stompLink.disconnect() if hfile: hfile.close() if _logFile: _logFile.close() time.sleep(1) # Let the STOMP library catch up
def train(args): print(args) if args.data_name == 'electronic': dataset = Amazon(args.data_name, args.device, use_one_hot_fea=args.use_one_hot_fea, symm=args.gcn_agg_norm_symm, test_ratio=args.data_test_ratio, valid_ratio=args.data_valid_ratio) else: dataset = MovieLens(args.data_name, args.device, use_one_hot_fea=args.use_one_hot_fea, symm=args.gcn_agg_norm_symm, test_ratio=args.data_test_ratio, valid_ratio=args.data_valid_ratio) print("Loading data finished ...\n") args.src_in_units = dataset.user_feature_shape[1] if args.data_name == 'electronic': args.dst_in_units = dataset.item_feature_shape[1] else: args.dst_in_units = dataset.item_feature_shape[1] args.rating_vals = dataset.possible_rating_values start = time.time() ### build the net net = Net(args=args) net = net.to(args.device) nd_possible_rating_values = th.FloatTensor( dataset.possible_rating_values).to(args.device) rating_loss_net = nn.CrossEntropyLoss() learning_rate = args.train_lr optimizer = get_optimizer(args.train_optimizer)(net.parameters(), lr=learning_rate) print("Loading network finished ...\n") ### perpare training data train_gt_labels = dataset.train_labels train_gt_ratings = dataset.train_truths ### prepare the logger train_loss_logger = MetricLogger( ['iter', 'loss', 'rmse'], ['%d', '%.4f', '%.4f'], os.path.join(args.save_dir, 'train_loss%d.csv' % args.save_id)) valid_loss_logger = MetricLogger(['iter', 'rmse'], ['%d', '%.4f'], os.path.join( args.save_dir, 'valid_loss%d.csv' % args.save_id)) test_loss_logger = MetricLogger(['iter', 'rmse'], ['%d', '%.4f'], os.path.join( args.save_dir, 'test_loss%d.csv' % args.save_id)) ### declare the loss information best_valid_rmse = np.inf no_better_valid = 0 best_iter = -1 count_rmse = 0 count_num = 0 count_loss = 0 dataset.train_enc_graph = dataset.train_enc_graph.int().to(args.device) dataset.train_dec_graph = dataset.train_dec_graph.int().to(args.device) dataset.valid_enc_graph = dataset.train_enc_graph dataset.valid_dec_graph = dataset.valid_dec_graph.int().to(args.device) dataset.test_enc_graph = dataset.test_enc_graph.int().to(args.device) dataset.test_dec_graph = dataset.test_dec_graph.int().to(args.device) print("Start training ...") dur = [] for iter_idx in range(1, args.train_max_iter): if iter_idx > 3: t0 = time.time() net.train() if args.data_name == 'electronic': pred_ratings = net(dataset.train_enc_graph, dataset.train_dec_graph, dataset.user_feature, dataset.item_feature) else: pred_ratings = net(dataset.train_enc_graph, dataset.train_dec_graph, dataset.user_feature, dataset.item_feature) loss = rating_loss_net(pred_ratings, train_gt_labels).mean() count_loss += loss.item() optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(net.parameters(), args.train_grad_clip) optimizer.step() if iter_idx > 3: dur.append(time.time() - t0) if iter_idx == 1: print("Total #Param of net: %d" % (torch_total_param_num(net))) print( torch_net_info(net, save_path=os.path.join( args.save_dir, 'net%d.txt' % args.save_id))) real_pred_ratings = (th.softmax(pred_ratings, dim=1) * nd_possible_rating_values.view(1, -1)).sum(dim=1) rmse = ((real_pred_ratings - train_gt_ratings)**2).sum() count_rmse += rmse.item() count_num += pred_ratings.shape[0] if iter_idx % args.train_log_interval == 0: train_loss_logger.log(iter=iter_idx, loss=count_loss / (iter_idx + 1), rmse=count_rmse / count_num) logging_str = "Iter={}, loss={:.4f}, rmse={:.4f}, time={:.4f}".format( iter_idx, count_loss / iter_idx, count_rmse / count_num, np.average(dur)) count_rmse = 0 count_num = 0 if iter_idx % args.train_valid_interval == 0: valid_rmse = evaluate(args=args, net=net, dataset=dataset, segment='valid') valid_loss_logger.log(iter=iter_idx, rmse=valid_rmse) logging_str += ',\tVal RMSE={:.4f}'.format(valid_rmse) if valid_rmse < best_valid_rmse: best_valid_rmse = valid_rmse no_better_valid = 0 best_iter = iter_idx test_rmse = evaluate(args=args, net=net, dataset=dataset, segment='test') best_test_rmse = test_rmse test_loss_logger.log(iter=iter_idx, rmse=test_rmse) logging_str += ', Test RMSE={:.4f}'.format(test_rmse) else: no_better_valid += 1 if no_better_valid > args.train_early_stopping_patience\ and learning_rate <= args.train_min_lr: logging.info( "Early stopping threshold reached. Stop training.") break if no_better_valid > args.train_decay_patience: new_lr = max(learning_rate * args.train_lr_decay_factor, args.train_min_lr) if new_lr < learning_rate: learning_rate = new_lr logging.info("\tChange the LR to %g" % new_lr) for p in optimizer.param_groups: p['lr'] = learning_rate no_better_valid = 0 if iter_idx % args.train_log_interval == 0: print(logging_str) print('Best Iter Idx={}, Best Valid RMSE={:.4f}, Best Test RMSE={:.4f}'. format(best_iter, best_valid_rmse, best_test_rmse)) train_loss_logger.close() valid_loss_logger.close() test_loss_logger.close() with open(os.path.join(args.save_dir, f'duration_{args.save_id:d}.txt'), 'a') as f: print(f'wall: {time.time() - start}') f.write(f'wall: {time.time() - start}')
def __init__(self): self.amazon = Amazon(api_key.amazon_key, api_key.amazon_secret) self.price_file = "pricebg.csv"
class AmazonGet(): # Amazon のカートへ商品を入れる def __init__(self): self.amazon = Amazon(api_key.amazon_key, api_key.amazon_secret) self.price_file = "pricebg.csv" def create_cart(self, asin): # カートを作る。 # 以降は返り値のcartidとhmacが必要 # 購入はpurchaseurlにアクセス res = self.amazon.cartCreate(str(asin), 1) soup = BeautifulSoup(res) ans = [] try: ans.append(soup.find('cartid').text.encode('sjis')) ans.append(soup.find('hmac').text.encode('sjis')) ans.append(soup.find('urlencodedhmac').text.encode('sjis')) ans.append(soup.find('purchaseurl').text) except UnicodeEncodeError: return [] except AttributeError: return [] return ans def cart_add(self, asin, cartid, hmac): # 指定された商品を一個カートに入れる res = self.amazon.cartAdd(asin, 1, cartid, hmac) def cart_get(self, cartid, hmac): # カートの情報を手に入れる # デバッグ用 res = self.amazon.cartGet(cartid, hmac) soup = BeautifulSoup(res) print soup.prettify() ans = [] try: ans.append(soup.find('subtotal').text.encode('sjis')) except UnicodeEncodeError: return [] except AttributeError: return [] return ans def solv_nap(self, nap_size, item_list): """ ナップサック問題を解く 価値は全て1、商品の大きさを実際の価格にしている これにより、指定された値段内で商品を詰められる """ gold = [-1] * (nap_size + 1) gold[0] = 0 for item in item_list: for i in range(nap_size + 1): if gold[i] != -1: if i + item.size < nap_size: if gold[i + item.size] < gold[i] + item.price: gold[i + item.size] = gold[i] + item.price # 最も最大金額に近いものを取り出す for i in range(nap_size, 0, -1): if gold[i] != -1: x = i break # 何を買ったかを調べる ans = [] item_list.reverse() for item in item_list: if 0 <= x - item.size: if gold[x] != gold[x - item.size]: ans.append((str(item.size), str(item.asin))) x -= item.size return ans def get_amazon_url(self, money): # 指定された金額でぴったりになるように商品を買う # 買う為のurlを返す reader = csv.reader(open(self.price_file, 'r')) # 必要な情報は値段とasinなのでそれを取り出す prices = set([]) item_list = [] for row in reader: if int(row[1]) not in prices: item_list.append(Item(1, int(row[1]), 0, row[0], row[2])) prices.add(int(row[1])) items = self.solv_nap(money, item_list) if items != None: id = self.create_cart(items[0][1]) for item in items[1:]: self.cart_add(item[1], id[0], id[1]) return id[3]
def get_amazon_data(self, query='Mobile', total_reviews=5): from amazon import Amazon obj = Amazon() review, rating = obj.search(query=query, total_reviews=total_reviews) return [review, rating]
import binascii import os import struct import sys # reset working directory to scripts directory dir_path = os.path.dirname(os.path.realpath(__file__)) os.chdir(dir_path) sys.stderr.write("[license_req.py] Using working directory: %s\n" % os.getcwd()) from amazon import Amazon if __name__ == "__main__": az = Amazon() az.logIn() if len(sys.argv) != 3: raise Exception("Usage: python %s resId challenge" % sys.argv[0]) resId = sys.argv[1] challenge = sys.argv[2] sys.stderr.write("[license_req.py] challenge (%d): %s\n" % (len(challenge), challenge) ) serverResponse = az.getWidevine2License(resId, challenge) assert ("widevine2License" in serverResponse) assert ("license" in serverResponse["widevine2License"]) assert ("metadata" in serverResponse["widevine2License"]) assert ("keyMetadata" in serverResponse["widevine2License"]["metadata"]) license = serverResponse["widevine2License"]["license"]
from flipkart import Flipkart from amazon import Amazon from olx import Olx product_name = input("Enter product name:- ") # Fetch data from Flipkart and store them into flipkart.csv f = Flipkart(product_name) f.get_details() f.store_data() # Fetch data from Amazon and store them into amazon.csv a = Amazon(product_name) a.get_details() a.store_data() # Fetch data from Olx and store them into olx.csv o = Olx(product_name) o.get_details() o.store_data() # read data from CSVs files and show them on bar-plot def get_data(): with open('amazon.csv', 'r') as d:
def main(args): ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime()) splits = ['train', 'valid'] + (['test'] if args.test else []) datasets = OrderedDict() for split in splits: # datasets[split] = BGoogle( # data_dir=args.data_dir, # split=split, # create_data=args.create_data, # batch_size=args.batch_size , # max_sequence_length=args.max_sequence_length, # min_occ=args.min_occ # ) datasets[split] = Amazon(data_dir=args.data_dir, split=split, create_data=args.create_data, batch_size=args.batch_size, max_sequence_length=args.max_sequence_length, min_occ=args.min_occ) model = SentenceVAE(vocab_size=datasets['train'].vocab_size, sos_idx=datasets['train'].sos_idx, eos_idx=datasets['train'].eos_idx, pad_idx=datasets['train'].pad_idx, unk_idx=datasets['train'].unk_idx, max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) if torch.cuda.is_available(): model = model.cuda() print(model) tokenizer = TweetTokenizer(preserve_case=False) vocab_file = "amazon.vocab.json" with open(os.path.join(args.data_dir, vocab_file), 'r') as file: vocab = json.load(file) w2i, i2w = vocab['w2i'], vocab['i2w'] if args.tensorboard_logging: writer = SummaryWriter( os.path.join(args.logdir, expierment_name(args, ts))) writer.add_text("model", str(model)) writer.add_text("args", str(args)) writer.add_text("ts", ts) # save_model_path = os.path.join(args.save_model_path, ts) save_model_path = args.save_model_path if not os.path.exists(save_model_path): os.makedirs(save_model_path) def kl_anneal_function(anneal_function, step, k, x0): if anneal_function == 'logistic': return float(1 / (1 + np.exp(-k * (step - x0)))) elif anneal_function == 'linear': return min(1, step / x0) NLL = torch.nn.NLLLoss(size_average=False, ignore_index=datasets['train'].pad_idx) def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0): # cut-off unnecessary padding from target, and flatten target = target[:, :torch.max(length).data].contiguous().view(-1) logp = logp.view(-1, logp.size(2)) # Negative Log Likelihood NLL_loss = NLL(logp, target) # KL Divergence KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp()) KL_weight = kl_anneal_function(anneal_function, step, k, x0) return NLL_loss, KL_loss, KL_weight optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.Tensor step = 0 save_mode = True last_ELBO = 1e32 for epoch in range(args.epochs): print("+" * 20) # f_test_example(model, tokenizer, w2i, i2w) for split in splits: # data_loader = DataLoader( # dataset=datasets[split], # batch_size=args.batch_size, # shuffle=split=='train', # num_workers=cpu_count(), # pin_memory=torch.cuda.is_available() # ) batch_size = args.batch_size tracker = defaultdict(tensor) # Enable/Disable Dropout if split == 'train': model.train() else: model.eval() # for iteration, batch in enumerate(data_loader): iteration = 0 iteration_total = datasets[split].batch_num print("batch_num", iteration_total) for input_batch_tensor, target_batch_tensor, length_batch_tensor in datasets[ split]: if torch.is_tensor(input_batch_tensor): input_batch_tensor = to_var(input_batch_tensor) if torch.is_tensor(target_batch_tensor): target_batch_tensor = to_var(target_batch_tensor) if torch.is_tensor(length_batch_tensor): length_batch_tensor = to_var(length_batch_tensor) # batch_size = batch['input'].size(0) # for k, v in batch.items(): # if torch.is_tensor(v): # batch[k] = to_var(v) # Forward pass # logp, mean, logv, z = model(batch['input'], batch['length']) logp, mean, logv, z = model(input_batch_tensor, length_batch_tensor) # loss calculation NLL_loss, KL_loss, KL_weight = loss_fn( logp, target_batch_tensor, length_batch_tensor, mean, logv, args.anneal_function, step, args.k, args.x0) loss = (NLL_loss + KL_weight * KL_loss) / batch_size # backward + optimization if split == 'train': optimizer.zero_grad() loss.backward() optimizer.step() step += 1 iteration += 1 # bookkeepeing # print("elbo", tracker['ELBO']) # print("loss", loss) if iteration == 0: tracker['ELBO'] = loss.data tracker['ELBO'] = tracker['ELBO'].view(1) else: tracker['ELBO'] = torch.cat( (tracker['ELBO'], loss.view(1))) if args.tensorboard_logging: # print(loss.data) writer.add_scalar("%s/ELBO" % split.upper(), loss.data.item(), epoch * iteration_total + iteration) writer.add_scalar("%s/NLL Loss" % split.upper(), NLL_loss.data.item() / batch_size, epoch * iteration_total + iteration) writer.add_scalar("%s/KL Loss" % split.upper(), KL_loss.data.item() / batch_size, epoch * iteration_total + iteration) writer.add_scalar("%s/KL Weight" % split.upper(), KL_weight, epoch * iteration_total + iteration) if iteration % args.print_every == 0 or iteration + 1 == iteration_total: print( "%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f" % (split.upper(), iteration, iteration_total - 1, loss.data.item(), NLL_loss.data.item() / batch_size, KL_loss.data.item() / batch_size, KL_weight)) # if split == 'valid': # if 'target_sents' not in tracker: # tracker['target_sents'] = list() # tracker['target_sents'] += idx2word(batch['target'].data, i2w=datasets['train'].get_i2w(), pad_idx=datasets['train'].pad_idx) # # print("z", tracker['z'], z) # tracker['z'] = torch.cat((tracker['z'], z.data), dim=0) # break print("%s Epoch %02d/%i, Mean ELBO %9.4f" % (split.upper(), epoch, args.epochs, torch.mean(tracker['ELBO']))) cur_ELBO = torch.mean(tracker['ELBO']) if args.tensorboard_logging: writer.add_scalar("%s-Epoch/ELBO" % split.upper(), cur_ELBO, epoch) if split == "valid": if cur_ELBO < last_ELBO: save_mode = True else: save_mode = False last_ELBO = cur_ELBO # save a dump of all sentences and the encoded latent space # if split == 'valid': # dump = {'target_sents':tracker['target_sents'], 'z':tracker['z'].tolist()} # if not os.path.exists(os.path.join('dumps', ts)): # os.makedirs('dumps/'+ts) # with open(os.path.join('dumps/'+ts+'/valid_E%i.json'%epoch), 'w') as dump_file: # json.dump(dump,dump_file) # save checkpoint if split == 'train': # checkpoint_path = os.path.join(save_model_path, "E%i.pytorch"%(epoch)) checkpoint_path = os.path.join(save_model_path, "best.pytorch") if save_mode == True: torch.save(model.state_dict(), checkpoint_path) print("Model saved at %s" % checkpoint_path)
app.add_url_rule('/', view_func=Main.as_view('main'), methods=["GET"]) app.add_url_rule('/<page>/', view_func=Main.as_view('page'), methods=["GET"]) app.add_url_rule('/login/', view_func=Login.as_view('login'), methods=["GET","POST"]) app.add_url_rule('/register/', view_func=Register.as_view('register'), methods=["GET","POST"]) app.add_url_rule('/amazon/', view_func=Amazon.as_view('amazon'), methods=["GET","POST"]) @app.errorhandler(404) def page_not_found(error): return flask.render_template('404.html'), 404 # Close DB @app.teardown_appcontext def close_connection(exception): db = getattr(g, '_database', None) if db is not None: db.close() app.debug = True
class AmazonGet(): # Amazon のカートへ商品を入れる def __init__(self): self.amazon = Amazon(api_key.amazon_key, api_key.amazon_secret) self.price_file = "pricebg.csv" def create_cart(self,asin): # カートを作る。 # 以降は返り値のcartidとhmacが必要 # 購入はpurchaseurlにアクセス res = self.amazon.cartCreate(str(asin), 1) soup = BeautifulSoup(res) ans = [] try: ans.append(soup.find('cartid').text.encode('sjis')) ans.append(soup.find('hmac').text.encode('sjis')) ans.append(soup.find('urlencodedhmac').text.encode('sjis')) ans.append(soup.find('purchaseurl').text) except UnicodeEncodeError: return [] except AttributeError: return [] return ans def cart_add(self,asin, cartid, hmac): # 指定された商品を一個カートに入れる res = self.amazon.cartAdd(asin,1,cartid, hmac) def cart_get(self, cartid, hmac): # カートの情報を手に入れる # デバッグ用 res = self.amazon.cartGet(cartid, hmac) soup = BeautifulSoup(res) print soup.prettify() ans = [] try: ans.append(soup.find('subtotal').text.encode('sjis')) except UnicodeEncodeError: return [] except AttributeError: return [] return ans def solv_nap(self,nap_size, item_list): """ ナップサック問題を解く 価値は全て1、商品の大きさを実際の価格にしている これにより、指定された値段内で商品を詰められる """ gold = [-1] * (nap_size+1) gold[0] = 0 for item in item_list: for i in range(nap_size+1): if gold[i] != -1: if i+item.size < nap_size: if gold[i+item.size] < gold[i] + item.price: gold[i+item.size] = gold[i]+item.price # 最も最大金額に近いものを取り出す for i in range(nap_size, 0, -1): if gold[i] != -1: x = i break # 何を買ったかを調べる ans = [] item_list.reverse() for item in item_list: if 0 <= x - item.size: if gold[x] != gold[x-item.size]: ans.append((str(item.size),str(item.asin))) x -= item.size return ans def get_amazon_url(self,money): # 指定された金額でぴったりになるように商品を買う # 買う為のurlを返す reader = csv.reader(open(self.price_file, 'r')) # 必要な情報は値段とasinなのでそれを取り出す prices = set([]) item_list = [] for row in reader: if int(row[1]) not in prices: item_list.append(Item(1,int(row[1]),0,row[0], row[2])) prices.add(int(row[1])) items = self.solv_nap(money, item_list) if items != None: id = self.create_cart(items[0][1]) for item in items[1:]: self.cart_add(item[1], id[0], id[1]) return id[3]
def main(secondsToAudit, configFile, gracePeriod, historyFile, logFile, auditPayments, auditRefunds): global _config global _civiDB global _awsLink global _stompLink global _logFile startTime = datetime.fromtimestamp( int(time.time()) - int(secondsToAudit), pytz.utc) endTime = datetime.fromtimestamp( int(time.time()) - int(gracePeriod), pytz.utc) print("AWS audit requested from %s to %s" % (startTime.isoformat(), endTime.isoformat())) # === Initialize the configuration file === localdir = os.path.dirname(os.path.abspath(__file__)) _config = SafeConfigParser() fileList = ["%s/amazon-audit.cfg" % localdir] if configFile is not None: fileList.append(configFile) _config.read(fileList) # === Open up ze STOMP === _stompLink = DistStomp(_config.get('Stomp', 'server'), _config.getint('Stomp', 'port')) _stompLink.connect() # === Connection to Amazon === _awsLink = Amazon(awsEndpoint=_config.get('AwsConfig', 'endpoint'), awsAccessKey=_config.get('AwsConfig', 'accessKey'), awsSecret=_config.get('AwsConfig', 'secretKey')) # === Connection to MySQL === _civiDB = MySQL.connect(_config.get('MySQL', 'host'), _config.get('MySQL', 'user'), _config.get('MySQL', 'password'), _config.get('MySQL', 'schema')) # === Open up the history and log files === # If the history file exists, it will modify the start time of this script to be the end time of the # history file. hfile = None historyStart = startTime if historyFile and os.path.exists(historyFile): hfile = open(historyFile, 'r') if hfile.readline().strip() == AWS_HISTORY_FILE_VERSTR: historyStart = dateutil.parser.parse(hfile.readline().strip()) historyEnd = dateutil.parser.parse(hfile.readline().strip()) startTime = historyEnd print("History file modified search period, now %s to %s" % (startTime.isoformat(), endTime.isoformat())) else: print('Not starting with a valid history file.') if logFile: _logFile = open(logFile, 'a') _logFile.write("!!! Starting run for dates %s -> %s\n" % (startTime.isoformat(), endTime.isoformat())) # === Sanity checks === if endTime < startTime: startTime = endTime # === Main Application === awsTransactions = [] # --- Process all previously pending transactions from the history file. If the transaction is still in some form # of pending, add it back to the history list. historyCount = 0 historyList = [] historyStats = {'Success': 0, 'Pending': 0, 'Failed': 0, 'Ignored': 0} if hfile: print("Processing history file") for txn in hfile: historyCount += 1 awsTransactions.append(json.loads(txn)) hfile.close() # --- Obtain AWS history --- if auditPayments: print("Obtaining AWS payment transactions for the period %s -> %s" % (startTime.isoformat(), endTime.isoformat())) awsTransactions += _awsLink.getAccountActivity(startTime, endDate=endTime, fpsOperation='Pay') print("Obtained %d transactions" % len(awsTransactions)) if auditRefunds: print("Obtaining AWS refund transactions for the period %s -> %s" % (startTime.isoformat(), endTime.isoformat())) awsTransactions += _awsLink.getAccountActivity(startTime, endDate=endTime, fpsOperation='Refund') print("Obtained %d transactions" % len(awsTransactions)) # --- Main loop: checks each aws transaction against the Civi database; adding it if it doesn't exist --- txncount = 0 for txn in awsTransactions: txncount += 1 result = dispatchTransaction(txn, auditPayments, auditRefunds) historyStats[result] += 1 if result == 'Pending': historyList.append(txn) print("\n--- Finished processing of messages. ---\n") # --- Write the history file --- if historyFile: print("Rewriting history file with %d transactions" % len(historyList)) hfile = open(historyFile, 'w') hfile.write("%s\n%s\n%s\n" % (AWS_HISTORY_FILE_VERSTR, historyStart.isoformat(), endTime.isoformat())) for txn in historyList: hfile.write("%s\n" % json.dumps(txn)) print("Flushing history file in preparation for main loop") hfile.flush() # --- Final statistics print("Processed %d AWS messages" % txncount) print(" ... of which %d messages were from history" % historyCount) print("This resulted in the following:") for entry in historyStats.items(): print(" %s Messages: %d" % entry) # === Final Application Cleanup === print("\nCleaning up.") _civiDB.close() _stompLink.disconnect() if hfile: hfile.close() if _logFile: _logFile.close() time.sleep(1) # Let the STOMP library catch up
def main(args): data_name = args.data_name with open(args.data_dir+data_name+'.vocab.json', 'r') as file: vocab = json.load(file) w2i, i2w = vocab['w2i'], vocab['i2w'] model = SentenceVAE( vocab_size=len(w2i), sos_idx=w2i['<sos>'], eos_idx=w2i['<eos>'], pad_idx=w2i['<pad>'], unk_idx=w2i['<unk>'], max_sequence_length=args.max_sequence_length, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, embedding_dropout=args.embedding_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional ) if not os.path.exists(args.load_checkpoint): raise FileNotFoundError(args.load_checkpoint) model.load_state_dict(torch.load(args.load_checkpoint)) print("Model loaded from %s"%(args.load_checkpoint)) if torch.cuda.is_available(): model = model.cuda() model.eval() samples, z = model.inference(n=args.num_samples) print('----------SAMPLES----------') print(*idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n') z1 = torch.randn([args.latent_size]).numpy() z2 = torch.randn([args.latent_size]).numpy() z = to_var(torch.from_numpy(interpolate(start=z1, end=z2, steps=8)).float()) samples, _ = model.inference(z=z) print('-------INTERPOLATION-------') print(*idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n') print('-------Encode ... Decode-------') datasets = Amazon( data_dir=args.data_dir, split="valid", create_data=False, batch_size=10, max_sequence_length=args.max_sequence_length, min_occ=3 ) iteration = 0 for input_batch_tensor, target_batch_tensor, length_batch_tensor in datasets: if torch.is_tensor(input_batch_tensor): input_batch_tensor = to_var(input_batch_tensor) if torch.is_tensor(target_batch_tensor): target_batch_tensor = to_var(target_batch_tensor) if torch.is_tensor(length_batch_tensor): length_batch_tensor = to_var(length_batch_tensor) print("*"*10) print("->"*10, *idx2word(input_batch_tensor, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n') logp, mean, logv, z = model(input_batch_tensor,length_batch_tensor) samples, z = model.inference(z=z) print("<-"*10, *idx2word(samples, i2w=i2w, pad_idx=w2i['<pad>']), sep='\n') # print("+"*10) if iteration == 0: break iteration += 1
def main(): # === Extract options === parser = OptionParser(usage="usage: %prog [options] <# of seconds to audit>") parser.add_option("-c", "--config", dest='configFile', default=None, help='Path to configuration file') parser.add_option("-g", "--gracePeriod", dest='gracePeriod', default=0, help='Number of seconds from now backwards to ignore') parser.add_option("-i", "--historyFile", dest='historyFile', default=None, help='Stores any pending transactions and the last run time') parser.add_option('-l', "--logFile", dest='logFile', default=None, help='Saves a log of all Amazon transactions') (options, args) = parser.parse_args() if len(args) != 1: parser.print_usage() exit() startTime = datetime.fromtimestamp(int(time.time()) - int(args[0]), pytz.utc) endTime = datetime.fromtimestamp(int(time.time()) - int(options.gracePeriod), pytz.utc) print("AWS refund audit requested from %s to %s" % (startTime.isoformat(), endTime.isoformat())) # === Get the configuration options === config = SafeConfigParser() fileList = ['./amazon-config.cfg'] if options.configFile is not None: fileList.append(options.configFile) config.read(fileList) # === Open up ze STOMP === sc = DistStomp(config.get('Stomp', 'server'), config.getint('Stomp', 'port')) sc.connect() # === Connection to Amazon === aws = Amazon( awsEndpoint = config.get('AwsConfig', 'endpoint'), awsAccessKey = config.get('AwsConfig', 'accessKey'), awsSecret = config.get('AwsConfig', 'secretKey') ) # === Connection to MySQL === dbcon = MySQL.connect( config.get('MySQL', 'host'), config.get('MySQL', 'user'), config.get('MySQL', 'password'), config.get('MySQL', 'schema') ) # === Open up the history and log files === # If the history file exists, it will modify the start time of this script to be the end time of the # history file. hfile = None historyStart = startTime historyEnd = endTime if options.historyFile and os.path.exists(options.historyFile): hfile = open(options.historyFile, 'r') if hfile.readline().strip() == AWS_HISTORY_FILE_VERSTR: historyStart = dateutil.parser.parse(hfile.readline().strip()) historyEnd = dateutil.parser.parse(hfile.readline().strip()) startTime = historyEnd print("History file modified search period, now %s to %s" % (startTime.isoformat(), endTime.isoformat())) else: print('Not starting with a valid history file.') sfile = None if options.logFile: sfile = open(options.logFile, 'a') sfile.write("!!! Starting run for dates %s -> %s\n" % (startTime.isoformat(), endTime.isoformat())) # === Sanity checks === if endTime < startTime: startTime = endTime # === Main Application === # --- Process all previously pending transactions from the history file. If the transaction is still in some form # of pending, add it back to the history list. historyCount = 0 historyList = [] historyStats = { 'Success': 0, 'Pending': 0, 'Failed': 0, 'Ignored': 0 } if hfile: print("Processing history file") for txn in hfile: historyCount += 1 txn = json.loads(txn) result = processTransaction(txn, dbcon, aws, sc, sfile, config) historyStats[result] += 1 if result == 'Pending': historyList.append(txn) hfile.close() # --- Obtain AWS history --- print("Obtaining AWS transactions for the period %s -> %s" % (startTime.isoformat(), endTime.isoformat())) awsTransactions = aws.getAccountActivity(startTime, endDate=endTime, fpsOperation='Pay') print("Obtained %d transactions" % len(awsTransactions)) # --- Main loop: checks each aws transaction against the Civi database; adding it if it doesn't exist --- txncount = 0 for txn in awsTransactions: txncount += 1 result = processTransaction(txn, dbcon, aws, sc, sfile, config) historyStats[result] += 1 if result == 'Pending': historyList.append(txn) print("\n--- Finished processing of messages. ---\n") # --- Prepare the history file for write --- if options.historyFile: print("Rewriting history file with %d transactions" % len(historyList)) hfile = open(options.historyFile, 'w') hfile.write("%s\n%s\n%s\n" % (AWS_HISTORY_FILE_VERSTR, historyStart.isoformat(), endTime.isoformat())) for txn in historyList: hfile.write("%s\n" % json.dumps(txn)) print("Flushing history file in preparation for main loop") hfile.flush() # --- Final statistics --- print("%d new AWS messages" % txncount) print(" Additionally %d messages were processed from history" % historyCount) print("This resulted in the following:") for entry in historyStats.items(): print(" %s Messages: %d" % entry) # === Final Application Cleanup === print("\nCleaning up.") dbcon.close() sc.disconnect() if hfile: hfile.close() if sfile: sfile.close() time.sleep(1) # Let the STOMP library catch up